diff options
Diffstat (limited to 'compiler/optimizing')
50 files changed, 3333 insertions, 1851 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 4c3f66aa4f..dc75ff1abc 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1590,15 +1590,18 @@ class BCEVisitor : public HGraphVisitor { HGraph* graph = GetGraph(); HInstruction* zero; switch (type) { - case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break; - case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break; - case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break; + case Primitive::kPrimNot: zero = graph->GetNullConstant(); break; + case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break; + case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break; default: zero = graph->GetConstant(type, 0); break; } HPhi* phi = new (graph->GetArena()) HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); phi->SetRawInputAt(0, instruction); phi->SetRawInputAt(1, zero); + if (type == Primitive::kPrimNot) { + phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo()); + } new_preheader->AddPhi(phi); return phi; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e1404ce59c..1178d0fb25 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -2841,15 +2841,21 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::CONST_STRING: { + uint32_t string_index = instruction.VRegB_21c(); + bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache( + *dex_file_, string_index); current_block_->AddInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc)); + new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } case Instruction::CONST_STRING_JUMBO: { + uint32_t string_index = instruction.VRegB_31c(); + bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache( + *dex_file_, string_index); current_block_->AddInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc)); + new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache)); UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 0a26786a87..9fda83840c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3234,6 +3234,147 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) } } +void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) { + Register in = locations->InAt(0).AsRegister<Register>(); + Location rhs = locations->InAt(1); + Register out = locations->Out().AsRegister<Register>(); + + if (rhs.IsConstant()) { + // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], + // so map all rotations to a +ve. equivalent in that range. + // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) + uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F; + if (rot) { + // Rotate, mapping left rotations to right equivalents if necessary. + // (e.g. left by 2 bits == right by 30.) + __ Ror(out, in, rot); + } else if (out != in) { + __ Mov(out, in); + } + } else { + __ Ror(out, in, rhs.AsRegister<Register>()); + } +} + +// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer +// rotates by swapping input regs (effectively rotating by the first 32-bits of +// a larger rotation) or flipping direction (thus treating larger right/left +// rotations as sub-word sized rotations in the other direction) as appropriate. +void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { + Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Location rhs = locations->InAt(1); + Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (rhs.IsConstant()) { + uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant()); + // Map all rotations to +ve. equivalents on the interval [0,63]. + rot &= kMaxLongShiftValue; + // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate + // logic below to a simple pair of binary orr. + // (e.g. 34 bits == in_reg swap + 2 bits right.) + if (rot >= kArmBitsPerWord) { + rot -= kArmBitsPerWord; + std::swap(in_reg_hi, in_reg_lo); + } + // Rotate, or mov to out for zero or word size rotations. + if (rot != 0u) { + __ Lsr(out_reg_hi, in_reg_hi, rot); + __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot)); + __ Lsr(out_reg_lo, in_reg_lo, rot); + __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot)); + } else { + __ Mov(out_reg_lo, in_reg_lo); + __ Mov(out_reg_hi, in_reg_hi); + } + } else { + Register shift_right = locations->GetTemp(0).AsRegister<Register>(); + Register shift_left = locations->GetTemp(1).AsRegister<Register>(); + Label end; + Label shift_by_32_plus_shift_right; + + __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); + __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6); + __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep); + __ b(&shift_by_32_plus_shift_right, CC); + + // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). + // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). + __ Lsl(out_reg_hi, in_reg_hi, shift_left); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Lsr(shift_left, in_reg_hi, shift_right); + __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); + __ b(&end); + + __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. + // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). + // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). + __ Lsr(out_reg_hi, in_reg_hi, shift_right); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Lsl(shift_right, in_reg_hi, shift_left); + __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); + + __ Bind(&end); + } +} +void LocationsBuilderARM::HandleRotate(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + switch (ror->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (ror->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + } +} + +void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + Primitive::Type type = ror->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HandleIntegerRotate(locations); + break; + } + case Primitive::kPrimLong: { + HandleLongRotate(locations); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitRor(HRor* op) { + HandleRotate(op); +} + +void InstructionCodeGeneratorARM::VisitRor(HRor* op) { + HandleRotate(op); +} + void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); @@ -5067,16 +5208,15 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( } void LocationsBuilderARM::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); @@ -5107,8 +5247,12 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadFromOffset(kLoadWord, out, out, cache_offset); } - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } static int32_t GetExceptionTlsOffset() { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 193add2541..8193c2808c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -170,6 +170,9 @@ class LocationsBuilderARM : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -213,6 +216,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateWideAtomicStore(Register addr, uint32_t offset, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 227f4be642..52058302be 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1791,6 +1791,17 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { __ Orr(dst, lhs, rhs); } else if (instr->IsSub()) { __ Sub(dst, lhs, rhs); + } else if (instr->IsRor()) { + if (rhs.IsImmediate()) { + uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1); + __ Ror(dst, lhs, shift); + } else { + // Ensure shift distance is in the same size register as the result. If + // we are rotating a long and the shift comes in a w register originally, + // we don't need to sxtw for use as an x since the shift distances are + // all & reg_bits - 1. + __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); + } } else { DCHECK(instr->IsXor()); __ Eor(dst, lhs, rhs); @@ -3850,16 +3861,15 @@ void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UN } void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); - codegen_->AddSlowPath(slow_path); - Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); @@ -3889,8 +3899,12 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { __ Ldr(out, MemOperand(out.X(), cache_offset)); } - __ Cbz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + if (!load->IsInDexCache()) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ Cbz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderARM64::VisitLocal(HLocal* local) { @@ -4229,6 +4243,7 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { default: LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); } } @@ -4258,6 +4273,14 @@ void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATT codegen_->GenerateFrameExit(); } +void LocationsBuilderARM64::VisitRor(HRor* ror) { + HandleBinaryOp(ror); +} + +void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { + HandleBinaryOp(ror); +} + void LocationsBuilderARM64::VisitShl(HShl* shl) { HandleShift(shl); } @@ -4295,6 +4318,7 @@ void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) { default: LOG(FATAL) << "Unimplemented local type " << field_type; + UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index d092de9421..ce7cbcd9d6 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -3413,24 +3413,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNU } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Register out = locations->Out().AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + + if (!load->IsInDexCache()) { + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderMIPS::VisitLocal(HLocal* local) { @@ -3913,6 +3917,16 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN codegen_->GenerateFrameExit(); } +void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + void LocationsBuilderMIPS::VisitShl(HShl* shl) { HandleShift(shl); } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 78f5644cfb..1a9de15c6f 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3105,16 +3105,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_U } void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); @@ -3123,8 +3122,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); // TODO: We will need a read barrier here. - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + + if (!load->IsInDexCache()) { + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Beqzc(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderMIPS64::VisitLocal(HLocal* local) { @@ -3519,6 +3523,16 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ codegen_->GenerateFrameExit(); } +void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + void LocationsBuilderMIPS64::VisitShl(HShl* shl) { HandleShift(shl); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 19f03df2a0..469dd49a8e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -433,6 +433,56 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -454,7 +504,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { // to be instrumented, e.g.: // // __ movl(out, Address(out, offset)); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -470,7 +520,9 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -612,14 +664,18 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -1831,7 +1887,7 @@ void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { @@ -3759,6 +3815,92 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register __ Bind(&done); } +void LocationsBuilderX86::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + + switch (ror->GetResultType()) { + case Primitive::kPrimLong: + // Add the temporary needed. + locations->AddTemp(Location::RequiresRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL (unless it is a constant). + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86::VisitRor(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + if (ror->GetResultType() == Primitive::kPrimInt) { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + __ rorl(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + __ rorl(first_reg, imm); + } + return; + } + + DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong); + Register first_reg_lo = first.AsRegisterPairLow<Register>(); + Register first_reg_hi = first.AsRegisterPairHigh<Register>(); + Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(second_reg, ECX); + __ movl(temp_reg, first_reg_hi); + __ shrd(first_reg_hi, first_reg_lo, second_reg); + __ shrd(first_reg_lo, temp_reg, second_reg); + __ movl(temp_reg, first_reg_hi); + __ testl(second_reg, Immediate(32)); + __ cmovl(kNotEqual, first_reg_hi, first_reg_lo); + __ cmovl(kNotEqual, first_reg_lo, temp_reg); + } else { + int32_t shift_amt = + CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue; + if (shift_amt == 0) { + // Already fine. + return; + } + if (shift_amt == 32) { + // Just swap. + __ movl(temp_reg, first_reg_lo); + __ movl(first_reg_lo, first_reg_hi); + __ movl(first_reg_hi, temp_reg); + return; + } + + Immediate imm(shift_amt); + // Save the constents of the low value. + __ movl(temp_reg, first_reg_lo); + + // Shift right into low, feeding bits from high. + __ shrd(first_reg_lo, first_reg_hi, imm); + + // Shift right into high, feeding bits from the original low. + __ shrd(first_reg_hi, temp_reg, imm); + + // Swap if needed. + if (shift_amt > 32) { + __ movl(temp_reg, first_reg_lo); + __ movl(first_reg_lo, first_reg_hi); + __ movl(first_reg_hi, temp_reg); + } + } +} + void LocationsBuilderX86::VisitShl(HShl* shl) { HandleShift(shl); } @@ -4006,7 +4148,7 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { /* * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. @@ -4260,9 +4402,14 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { // Long values can be loaded atomically into an XMM using movsd. - // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM - // and then copy the XMM into the output 32bits at a time). + // So we use an XMM register as a temp to achieve atomicity (first + // load the temp into the XMM and then copy the XMM into the + // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4300,9 +4447,32 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimInt: - case Primitive::kPrimNot: { __ movl(out.AsRegister<Register>(), Address(base, offset)); break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ movl(out.AsRegister<Register>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; } case Primitive::kPrimLong: { @@ -4337,17 +4507,20 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - // Longs are handled in the switch. - if (field_type != Primitive::kPrimLong) { + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) { + // Potential implicit null checks, in the case of reference or + // long fields, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4412,7 +4585,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } bool maybe_record_implicit_null_check_done = false; @@ -4517,7 +4690,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4698,6 +4871,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -4705,12 +4883,13 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); + Location out_loc = locations->Out(); Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4722,7 +4901,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4734,7 +4913,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4746,7 +4925,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4756,13 +4935,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4772,20 +4947,56 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + uint32_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Location out = locations->Out(); - DCHECK_NE(obj, out.AsRegisterPairLow<Register>()); + DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); + __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); + __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); } else { - __ movl(out.AsRegisterPairLow<Register>(), + __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out.AsRegisterPairHigh<Register>(), + __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); } break; @@ -4793,7 +5004,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4805,7 +5016,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4820,23 +5031,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { UNREACHABLE(); } - if (type != Primitive::kPrimLong) { + if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) { + // Potential implicit null checks, in the case of reference or + // long arrays, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } - - if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } - } } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { @@ -4968,12 +5168,12 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { // __ movl(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = register_value->klass_ // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); // // __ cmpl(temp, temp2); @@ -5254,8 +5454,8 @@ void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instructio DCHECK_EQ(slow_path->GetSuccessor(), successor); } - __ fs()->cmpw(Address::Absolute( - Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); + __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), + Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetReturnLabel()); @@ -5536,32 +5736,16 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leal(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movl(out, Address(current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ leal(out, Address(out, cache_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ movl(out, Address(out, cache_offset)); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -5611,49 +5795,36 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( } void LocationsBuilderX86::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leal(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ leal(out, Address(out, cache_offset)); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ movl(out, Address(out, cache_offset)); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } - - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -5693,6 +5864,14 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5718,21 +5897,22 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5748,10 +5928,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ movl(out, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); @@ -5772,17 +5951,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5811,17 +5981,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5845,17 +6006,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } __ j(kEqual, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ movl(out, Address(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5899,6 +6051,13 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, /* is_fatal */ false); @@ -5951,27 +6110,27 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5991,8 +6150,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6014,18 +6172,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -6038,8 +6186,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6065,18 +6212,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } __ j(kEqual, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -6088,8 +6225,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6106,19 +6242,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -6132,8 +6257,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6141,8 +6265,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6159,6 +6282,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6320,14 +6450,226 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ movl(temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ leal(root_reg, Address(obj, offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + } +} + +void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Address src(obj, offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} +void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + Address src = index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : + Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); + __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86 memory model. + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // The actual reference load. + // /* HeapReference<Object> */ ref = *src + __ movl(ref_reg, src); + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -6341,57 +6683,41 @@ void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<Register>()); } } -void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6750,7 +7076,7 @@ Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, // TODO: target as memory. void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) { if (!target.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f9403a67c0..712179920b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -219,11 +219,44 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); - void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, @@ -364,6 +397,8 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_, block); } @@ -405,7 +440,26 @@ class CodeGeneratorX86 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -422,23 +476,25 @@ class CodeGeneratorX86 : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -448,9 +504,18 @@ class CodeGeneratorX86 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); struct PcRelativeDexCacheAccessInfo { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 44a51ea6e2..2c5fbc78bf 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -456,6 +456,56 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -477,7 +527,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { // reference load to be instrumented, e.g.: // // __ movl(out, Address(out, offset)); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -493,7 +543,9 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -634,13 +686,17 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -731,7 +787,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ gs()->movl(temp.AsRegister<CpuRegister>(), - Address::Absolute(invoke->GetStringInitOffset(), true)); + Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); @@ -748,7 +804,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, invoke->GetDexCacheArrayOffset()); __ movq(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, false /* no_rip */)); + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind the label at the end of the "movl" insn. __ Bind(&pc_relative_dex_cache_patches_.back().label); break; @@ -907,7 +963,7 @@ void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(instruction, slow_path); - __ gs()->call(Address::Absolute(entry_point_offset, true)); + __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1939,7 +1995,7 @@ void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) } void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { @@ -2667,7 +2723,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else { DCHECK(in.GetConstant()->IsIntConstant()); __ movl(out.AsRegister<CpuRegister>(), - Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue()))); + Immediate(static_cast<uint16_t>( + in.GetConstant()->AsIntConstant()->GetValue()))); } break; @@ -2911,7 +2968,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ addss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ addss(first.AsFpuRegister<XmmRegister>(), @@ -2925,7 +2983,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ addsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ addsd(first.AsFpuRegister<XmmRegister>(), @@ -3000,7 +3059,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ subss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ subss(first.AsFpuRegister<XmmRegister>(), @@ -3014,7 +3074,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ subsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ subsd(first.AsFpuRegister<XmmRegister>(), @@ -3121,7 +3182,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ mulss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ mulss(first.AsFpuRegister<XmmRegister>(), @@ -3136,7 +3198,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ mulsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ mulsd(first.AsFpuRegister<XmmRegister>(), @@ -3542,7 +3605,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ divss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ divss(first.AsFpuRegister<XmmRegister>(), @@ -3556,7 +3620,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ divsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ divsd(first.AsFpuRegister<XmmRegister>(), @@ -3755,6 +3820,56 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { } default: LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + + switch (ror->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL (unless it is a constant). + locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); + Location second = locations->InAt(1); + + switch (ror->GetResultType()) { + case Primitive::kPrimInt: + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + __ rorl(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + __ rorl(first_reg, imm); + } + break; + case Primitive::kPrimLong: + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + __ rorq(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue); + __ rorq(first_reg, imm); + } + break; + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); } } @@ -3910,10 +4025,10 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED LOG(FATAL) << "Unimplemented"; } -void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { /* * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. - * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. * For those cases, all we need to ensure is that there is a scheduling barrier in place. */ switch (kind) { @@ -3953,6 +4068,11 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -3988,12 +4108,36 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; + } + case Primitive::kPrimLong: { __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; @@ -4014,14 +4158,20 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + if (field_type == Primitive::kPrimNot) { + // Potential implicit null checks, in the case of reference + // fields, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (is_volatile) { + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4075,7 +4225,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, uint32_t offset = field_info.GetFieldOffset().Uint32Value(); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } bool maybe_record_implicit_null_check_done = false; @@ -4181,7 +4331,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4358,6 +4508,11 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4365,12 +4520,13 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + Location out_loc = locations->Out(); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4382,7 +4538,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4394,7 +4550,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4406,7 +4562,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4416,13 +4572,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4432,9 +4584,46 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + CpuRegister out = out_loc.AsRegister<CpuRegister>(); + if (index.IsConstant()) { + uint32_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movq(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4446,7 +4635,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4458,7 +4647,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4472,20 +4661,12 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4609,12 +4790,12 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { // __ movl(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = register_value->klass_ // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); // // __ cmpl(temp, temp2); @@ -4840,8 +5021,8 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, __ testl(value, value); __ j(kEqual, &is_null); } - __ gs()->movq(card, Address::Absolute( - Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true)); + __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true)); __ movq(temp, object); __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); __ movb(Address(temp, card, TIMES_1, 0), card); @@ -4900,8 +5081,9 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc DCHECK_EQ(slow_path->GetSuccessor(), successor); } - __ gs()->cmpw(Address::Absolute( - Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); + __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true), + Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetReturnLabel()); @@ -5125,7 +5307,7 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( Immediate(mirror::Class::kStatusInitialized)); __ j(kLess, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - // No need for memory fence, thanks to the X86_64 memory model. + // No need for memory fence, thanks to the x86-64 memory model. } void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { @@ -5156,32 +5338,16 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leaq(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movq(out, Address(current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ leaq(out, Address(out, cache_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ movl(out, Address(out, cache_offset)); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -5220,53 +5386,41 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { } void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leaq(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ leaq(out, Address(out, cache_offset)); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ movl(out, Address(out, cache_offset)); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } - - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { - return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true); + return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true); } void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { @@ -5302,6 +5456,14 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5327,21 +5489,22 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5357,10 +5520,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ movl(out, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); @@ -5386,17 +5548,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, success; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5425,17 +5578,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5459,17 +5603,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } __ j(kEqual, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ movl(out, Address(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5513,6 +5648,13 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, /* is_fatal */ false); @@ -5565,27 +5707,27 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5597,7 +5739,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); - NearLabel done; + Label done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); @@ -5605,8 +5747,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5628,18 +5769,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -5652,8 +5783,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -5679,18 +5809,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } __ j(kEqual, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -5702,8 +5822,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5720,19 +5839,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -5746,8 +5854,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -5755,8 +5862,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5773,6 +5879,13 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5916,14 +6029,227 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } -void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + CpuRegister out_reg = out.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ movl(temp.AsRegister<CpuRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + CpuRegister out_reg = out.AsRegister<CpuRegister>(); + CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + CpuRegister obj, + uint32_t offset) { + CpuRegister root_reg = root.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ leaq(root_reg, Address(obj, offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + } +} + +void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Address src(obj, offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + Address src = index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : + Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); + CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); + __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86-64 memory model. + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // The actual reference load. + // /* HeapReference<Object> */ ref = *src + __ movl(ref_reg, src); + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -5937,57 +6263,41 @@ void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); } } -void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6236,7 +6546,7 @@ Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { // TODO: trg as memory. void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 145b1f33b4..dda9ea22d9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -47,6 +47,12 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +// These XMM registers are non-volatile in ART ABI, but volatile in native ABI. +// If the ART ABI changes, this list must be updated. It is used to ensure that +// these are not clobbered by any direct call to native code (such as math intrinsics). +static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; + + class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeRuntimeCallingConvention() @@ -207,11 +213,44 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); - void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + CpuRegister obj, + uint32_t offset); + void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, @@ -318,6 +357,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + // Helper method to move a value between two locations. void Move(Location destination, Location source); @@ -350,7 +391,26 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + CpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + CpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -367,23 +427,25 @@ class CodeGeneratorX86_64 : public CodeGenerator { // When `index` provided (i.e., when it is different from // Location::NoLocation()), the offset value passed to // artReadBarrierSlow is adjusted to take `index` into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -393,7 +455,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); int ConstantAreaStart() const { return constant_area_start_; @@ -418,6 +480,15 @@ class CodeGeneratorX86_64 : public CodeGenerator { HInstruction* instruction); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check); + struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) : target_dex_file(dex_file), element_offset(element_off), label() { } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index af8b8b562a..10d83439fd 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -202,6 +202,11 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return true; } + // Our code generator ensures shift distances are within an encodable range. + if (instr->IsRor()) { + return true; + } + int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) { diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index e469c8d6d0..a8f65bf516 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -32,7 +32,7 @@ namespace art { /** * Fixture class for the constant folding and dce tests. */ -class ConstantFoldingTest : public testing::Test { +class ConstantFoldingTest : public CommonCompilerTest { public: ConstantFoldingTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -56,7 +56,7 @@ class ConstantFoldingTest : public testing::Test { const std::string& expected_after_dce, std::function<void(HGraph*)> check_after_cf) { ASSERT_NE(graph_, nullptr); - graph_->TryBuildingSsa(); + TransformToSsa(graph_); StringPrettyPrinter printer_before(graph_); printer_before.VisitInsertionOrder(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 2c6a1ef63d..f0f98efadb 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -26,6 +26,8 @@ namespace art { +class DeadCodeEliminationTest : public CommonCompilerTest {}; + static void TestCode(const uint16_t* data, const std::string& expected_before, const std::string& expected_after) { @@ -34,7 +36,7 @@ static void TestCode(const uint16_t* data, HGraph* graph = CreateCFG(&allocator, data); ASSERT_NE(graph, nullptr); - graph->TryBuildingSsa(); + TransformToSsa(graph); StringPrettyPrinter printer_before(graph); printer_before.VisitInsertionOrder(); @@ -55,7 +57,6 @@ static void TestCode(const uint16_t* data, ASSERT_EQ(actual_after, expected_after); } - /** * Small three-register program. * @@ -69,7 +70,7 @@ static void TestCode(const uint16_t* data, * L1: v2 <- v0 + v1 5. add-int v2, v0, v1 * return-void 7. return */ -TEST(DeadCodeElimination, AdditionAndConditionalJump) { +TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, @@ -131,7 +132,7 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) { * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 * return 13. return-void */ -TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { +TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c16b872466..f3c1dbe3f5 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -24,6 +24,7 @@ #include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "base/stringprintf.h" +#include "handle_scope-inl.h" namespace art { @@ -594,6 +595,17 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } } + + // Ensure that reference type instructions have reference type info. + if (instruction->GetType() == Primitive::kPrimNot) { + ScopedObjectAccess soa(Thread::Current()); + if (!instruction->GetReferenceTypeInfo().IsValid()) { + AddError(StringPrintf("Reference type instruction %s:%d does not have " + "valid reference type information.", + instruction->DebugName(), + instruction->GetId())); + } + } } static Primitive::Type PrimitiveKind(Primitive::Type type) { @@ -850,7 +862,7 @@ void SSAChecker::VisitCondition(HCondition* op) { void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); - if (op->IsUShr() || op->IsShr() || op->IsShl()) { + if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) { if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) { AddError(StringPrintf( "Shift operation %s %d has a non-int kind second input: " diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index fee56c7f9e..d10df4ce3f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -17,8 +17,6 @@ #include "graph_checker.h" #include "optimizing_unit_test.h" -#include "gtest/gtest.h" - namespace art { /** @@ -43,7 +41,6 @@ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { return graph; } - static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -61,8 +58,7 @@ static void TestCodeSSA(const uint16_t* data) { HGraph* graph = CreateCFG(&allocator, data); ASSERT_NE(graph, nullptr); - graph->BuildDominatorTree(); - graph->TransformToSsa(); + TransformToSsa(graph); SSAChecker ssa_checker(graph); ssa_checker.Run(); @@ -145,7 +141,9 @@ TEST(GraphChecker, BlockEndingWithNonBranchInstruction) { ASSERT_FALSE(graph_checker.IsValid()); } -TEST(SSAChecker, SSAPhi) { +class SSACheckerTest : public CommonCompilerTest {}; + +TEST_F(SSACheckerTest, SSAPhi) { // This code creates one Phi function during the conversion to SSA form. const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e9fdb84d1e..5f1328f545 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -30,6 +30,7 @@ #include "optimization.h" #include "reference_type_propagation.h" #include "register_allocator.h" +#include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" @@ -505,7 +506,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } - } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + } else if ((IsPass(SsaBuilder::kSsaBuilderPassName) || IsPass(HInliner::kInlinerPassName)) && (instruction->GetType() == Primitive::kPrimNot)) { ReferenceTypeInfo info = instruction->IsLoadClass() @@ -519,21 +520,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; - } else if (instruction->IsNullConstant()) { + } else { // The NullConstant may be added to the graph during other passes that happen between // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner // doesn't run or doesn't inline anything, the NullConstant remains untyped. // So we should check NullConstants for validity only after reference type propagation. - // - // Note: The infrastructure to properly type NullConstants everywhere is to complex to add - // for the benefits. - StartAttributeStream("klass") << "not_set"; - DCHECK(!is_after_pass_ - || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) - << " Expected a valid rti after reference type propagation"; - } else { - DCHECK(!is_after_pass_) - << "Expected a valid rti after reference type propagation"; + DCHECK(graph_in_bad_state_ || + (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName))) + << instruction->DebugName() << instruction->GetId() << " has invalid rti " + << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_; } } if (disasm_info_ != nullptr) { diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index de60cf21aa..9929696ded 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -21,11 +21,11 @@ #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "gtest/gtest.h" - namespace art { -TEST(GVNTest, LocalFieldElimination) { +class GVNTest : public CommonCompilerTest {}; + +TEST_F(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); NullHandle<mirror::DexCache> dex_cache; @@ -100,7 +100,7 @@ TEST(GVNTest, LocalFieldElimination) { ASSERT_EQ(different_offset->GetBlock(), block); ASSERT_EQ(use_after_kill->GetBlock(), block); - graph->TryBuildingSsa(); + TransformToSsa(graph); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -110,7 +110,7 @@ TEST(GVNTest, LocalFieldElimination) { ASSERT_EQ(use_after_kill->GetBlock(), block); } -TEST(GVNTest, GlobalFieldElimination) { +TEST_F(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); NullHandle<mirror::DexCache> dex_cache; @@ -182,7 +182,7 @@ TEST(GVNTest, GlobalFieldElimination) { 0)); join->AddInstruction(new (&allocator) HExit()); - graph->TryBuildingSsa(); + TransformToSsa(graph); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -193,7 +193,7 @@ TEST(GVNTest, GlobalFieldElimination) { ASSERT_TRUE(join->GetFirstInstruction()->IsExit()); } -TEST(GVNTest, LoopFieldElimination) { +TEST_F(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); NullHandle<mirror::DexCache> dex_cache; @@ -288,7 +288,7 @@ TEST(GVNTest, LoopFieldElimination) { ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body); ASSERT_EQ(field_get_in_exit->GetBlock(), exit); - graph->TryBuildingSsa(); + TransformToSsa(graph); { SideEffectsAnalysis side_effects(graph); side_effects.Run(); @@ -316,7 +316,7 @@ TEST(GVNTest, LoopFieldElimination) { } // Test that inner loops affect the side effects of the outer loop. -TEST(GVNTest, LoopSideEffects) { +TEST_F(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); NullHandle<mirror::DexCache> dex_cache; @@ -364,7 +364,7 @@ TEST(GVNTest, LoopSideEffects) { inner_loop_exit->AddInstruction(new (&allocator) HGoto()); outer_loop_exit->AddInstruction(new (&allocator) HExit()); - graph->TryBuildingSsa(); + TransformToSsa(graph); ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn( *outer_loop_header->GetLoopInformation())); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 5de94f43c9..776c115e9d 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -18,7 +18,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -28,7 +27,7 @@ namespace art { /** * Fixture class for the InductionVarAnalysis tests. */ -class InductionVarAnalysisTest : public testing::Test { +class InductionVarAnalysisTest : public CommonCompilerTest { public: InductionVarAnalysisTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -102,6 +101,7 @@ class InductionVarAnalysisTest : public testing::Test { basic_[d] = new (&allocator_) HLocal(d); entry_->AddInstruction(basic_[d]); loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_)); + loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto()); HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt); loop_header_[d]->AddInstruction(load); HInstruction* compare = new (&allocator_) HLessThan(load, constant100_); @@ -168,7 +168,7 @@ class InductionVarAnalysisTest : public testing::Test { // Performs InductionVarAnalysis (after proper set up). void PerformInductionVarAnalysis() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); iva_ = new (&allocator_) HInductionVarAnalysis(graph_); iva_->Run(); } @@ -212,7 +212,7 @@ TEST_F(InductionVarAnalysisTest, ProperLoopSetup) { // .. // } BuildLoopNest(10); - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); ASSERT_EQ(entry_->GetLoopInformation(), nullptr); for (int d = 0; d < 1; d++) { ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(), diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 128b5bb811..a1c797a80a 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -16,7 +16,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "induction_var_analysis.h" #include "induction_var_range.h" #include "nodes.h" @@ -29,7 +28,7 @@ using Value = InductionVarRange::Value; /** * Fixture class for the InductionVarRange tests. */ -class InductionVarRangeTest : public testing::Test { +class InductionVarRangeTest : public CommonCompilerTest { public: InductionVarRangeTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -105,7 +104,7 @@ class InductionVarRangeTest : public testing::Test { /** Performs induction variable analysis. */ void PerformInductionVarAnalysis() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); iva_->Run(); } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index a4dcb3aeba..db1170909f 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -33,6 +33,7 @@ #include "reference_type_propagation.h" #include "register_allocator.h" #include "sharpening.h" +#include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -514,7 +515,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, return false; } - if (!callee_graph->TryBuildingSsa()) { + if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; return false; @@ -549,14 +550,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // Run simple optimizations on the graph. HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); - ReferenceTypePropagation type_propagation(callee_graph, handles_); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_); HOptimization* optimizations[] = { &intrinsics, - &type_propagation, &sharpening, &simplify, &fold, @@ -677,42 +676,36 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph()); } - // When merging the graph we might create a new NullConstant in the caller graph which does - // not have the chance to be typed. We assign the correct type here so that we can keep the - // assertion that every reference has a valid type. This also simplifies checks along the way. - HNullConstant* null_constant = graph_->GetNullConstant(); - if (!null_constant->GetReferenceTypeInfo().IsValid()) { - ReferenceTypeInfo::TypeHandle obj_handle = - handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject)); - null_constant->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); - } - // Check the integrity of reference types and run another type propagation if needed. - if ((return_replacement != nullptr) - && (return_replacement->GetType() == Primitive::kPrimNot)) { - if (!return_replacement->GetReferenceTypeInfo().IsValid()) { - // Make sure that we have a valid type for the return. We may get an invalid one when - // we inline invokes with multiple branches and create a Phi for the result. - // TODO: we could be more precise by merging the phi inputs but that requires - // some functionality from the reference type propagation. - DCHECK(return_replacement->IsPhi()); - size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - ReferenceTypeInfo::TypeHandle return_handle = - handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); - return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( - return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); - } + if (return_replacement != nullptr) { + if (return_replacement->GetType() == Primitive::kPrimNot) { + if (!return_replacement->GetReferenceTypeInfo().IsValid()) { + // Make sure that we have a valid type for the return. We may get an invalid one when + // we inline invokes with multiple branches and create a Phi for the result. + // TODO: we could be more precise by merging the phi inputs but that requires + // some functionality from the reference type propagation. + DCHECK(return_replacement->IsPhi()); + size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + ReferenceTypeInfo::TypeHandle return_handle = + handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); + return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( + return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); + } - if (do_rtp) { - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation rtp_fixup(graph_, handles_); - rtp_fixup.Run(); + if (do_rtp) { + // If the return type is a refinement of the declared type run the type propagation again. + ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); + ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); + if (invoke_rti.IsStrictSupertypeOf(return_rti) + || (return_rti.IsExact() && !invoke_rti.IsExact()) + || !return_replacement->CanBeNull()) { + ReferenceTypePropagation(graph_, handles_).Run(); + } + } + } else if (return_replacement->IsInstanceOf()) { + if (do_rtp) { + // Inlining InstanceOf into an If may put a tighter bound on reference types. + ReferenceTypePropagation(graph_, handles_).Run(); } } } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2f3df7fc68..e1b13c5087 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -39,6 +39,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { } } + bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotate(HBinaryOperation* instruction); + bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop); void VisitShift(HBinaryOperation* shift); @@ -77,6 +83,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; + void SimplifyRotate(HInvoke* invoke, bool is_left); void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); @@ -173,6 +180,161 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { } } +static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) { + return (sub->GetRight() == other && + sub->GetLeft()->IsConstant() && + (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0); +} + +bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(), + ushr->GetLeft(), + ushr->GetRight()); + op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror); + if (!ushr->HasUses()) { + ushr->GetBlock()->RemoveInstruction(ushr); + } + if (!ushr->GetRight()->HasUses()) { + ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight()); + } + if (!shl->HasUses()) { + shl->GetBlock()->RemoveInstruction(shl); + } + if (!shl->GetRight()->HasUses()) { + shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight()); + } + return true; +} + +// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation. +bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { + // This simplification is currently supported on x86, x86_64, ARM and ARM64. + // TODO: Implement it for MIPS/64. + const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); + switch (instruction_set) { + case kArm: + case kArm64: + case kThumb2: + case kX86: + case kX86_64: + break; + default: + return false; + } + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + HInstruction* left = op->GetLeft(); + HInstruction* right = op->GetRight(); + // If we have an UShr and a Shl (in either order). + if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) { + HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr(); + HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl(); + DCHECK(Primitive::IsIntOrLongType(ushr->GetType())); + if (ushr->GetType() == shl->GetType() && + ushr->GetLeft() == shl->GetLeft()) { + if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) { + // Shift distances are both constant, try replacing with Ror if they + // add up to the register size. + return TryReplaceWithRotateConstantPattern(op, ushr, shl); + } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) { + // Shift distances are potentially of the form x and (reg_size - x). + return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl); + } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) { + // Shift distances are potentially of the form d and -d. + return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl); + } + } + } + return false; +} + +// Try replacing code looking like (x >>> #rdist OP x << #ldist): +// UShr dst, x, #rdist +// Shl tmp, x, #ldist +// OP dst, dst, tmp +// or like (x >>> #rdist OP x << #-ldist): +// UShr dst, x, #rdist +// Shl tmp, x, #-ldist +// OP dst, dst, tmp +// with +// Ror dst, x, #rdist +bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant()); + size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant()); + if (((ldist + rdist) & (reg_bits - 1)) == 0) { + ReplaceRotateWithRor(op, ushr, shl); + return true; + } + return false; +} + +// Replace code looking like (x >>> -d OP x << d): +// Neg neg, d +// UShr dst, x, neg +// Shl tmp, x, d +// OP dst, dst, tmp +// with +// Neg neg, d +// Ror dst, x, neg +// *** OR *** +// Replace code looking like (x >>> d OP x << -d): +// UShr dst, x, d +// Neg neg, d +// Shl tmp, x, neg +// OP dst, dst, tmp +// with +// Ror dst, x, d +bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()); + bool neg_is_left = shl->GetRight()->IsNeg(); + HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg(); + // And the shift distance being negated is the distance being shifted the other way. + if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) { + ReplaceRotateWithRor(op, ushr, shl); + } + return false; +} + +// Try replacing code looking like (x >>> d OP x << (#bits - d)): +// UShr dst, x, d +// Sub ld, #bits, d +// Shl tmp, x, ld +// OP dst, dst, tmp +// with +// Ror dst, x, d +// *** OR *** +// Replace code looking like (x >>> (#bits - d) OP x << d): +// Sub rd, #bits, d +// UShr dst, x, rd +// Shl tmp, x, d +// OP dst, dst, tmp +// with +// Neg neg, d +// Ror dst, x, neg +bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()); + size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + HInstruction* shl_shift = shl->GetRight(); + HInstruction* ushr_shift = ushr->GetRight(); + if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) || + (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) { + return ReplaceRotateWithRor(op, ushr, shl); + } + return false; +} + void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { HInstruction* obj = null_check->InputAt(0); if (!obj->CanBeNull()) { @@ -530,7 +692,10 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); neg->GetBlock()->RemoveInstruction(neg); + return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -906,7 +1071,10 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { // src instruction->ReplaceWith(instruction->GetLeft()); instruction->GetBlock()->RemoveInstruction(instruction); + return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { @@ -1027,6 +1195,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { RecordSimplification(); return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) { @@ -1095,6 +1265,42 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { } } +void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); + // This simplification is currently supported on x86, x86_64, ARM and ARM64. + // TODO: Implement it for MIPS/64. + const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); + switch (instruction_set) { + case kArm: + case kArm64: + case kThumb2: + case kX86: + case kX86_64: + break; + default: + return; + } + HInstruction* value = invoke->InputAt(0); + HInstruction* distance = invoke->InputAt(1); + // Replace the invoke with an HRor. + if (is_left) { + distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance); + invoke->GetBlock()->InsertInstructionBefore(distance, invoke); + } + HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror); + // Remove ClinitCheck and LoadClass, if possible. + HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1); + if (clinit->IsClinitCheck() && !clinit->HasUses()) { + clinit->GetBlock()->RemoveInstruction(clinit); + HInstruction* ldclass = clinit->InputAt(0); + if (ldclass->IsLoadClass() && !ldclass->HasUses()) { + ldclass->GetBlock()->RemoveInstruction(ldclass); + } + } +} + static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) { if (potential_length->IsArrayLength()) { return potential_length->InputAt(0) == potential_array; @@ -1165,6 +1371,12 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifyStringEquals(instruction); } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) { SimplifySystemArrayCopy(instruction); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight || + instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) { + SimplifyRotate(instruction, false); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft || + instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) { + SimplifyRotate(instruction, true); } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6a34b13320..6bbc751bee 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -49,6 +49,7 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value()); HArm64IntermediateAddress* address = new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc); + address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); access->ReplaceInput(address, 0); // Both instructions must depend on GC to prevent any instruction that can diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 834081188b..7127215c51 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -189,6 +189,42 @@ static Intrinsics GetIntrinsic(InlineMethod method) { return ((method.d.data & kIntrinsicFlagMin) == 0) ? Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; + // More math builtins. + case kIntrinsicCos: + return Intrinsics::kMathCos; + case kIntrinsicSin: + return Intrinsics::kMathSin; + case kIntrinsicAcos: + return Intrinsics::kMathAcos; + case kIntrinsicAsin: + return Intrinsics::kMathAsin; + case kIntrinsicAtan: + return Intrinsics::kMathAtan; + case kIntrinsicAtan2: + return Intrinsics::kMathAtan2; + case kIntrinsicCbrt: + return Intrinsics::kMathCbrt; + case kIntrinsicCosh: + return Intrinsics::kMathCosh; + case kIntrinsicExp: + return Intrinsics::kMathExp; + case kIntrinsicExpm1: + return Intrinsics::kMathExpm1; + case kIntrinsicHypot: + return Intrinsics::kMathHypot; + case kIntrinsicLog: + return Intrinsics::kMathLog; + case kIntrinsicLog10: + return Intrinsics::kMathLog10; + case kIntrinsicNextAfter: + return Intrinsics::kMathNextAfter; + case kIntrinsicSinh: + return Intrinsics::kMathSinh; + case kIntrinsicTan: + return Intrinsics::kMathTan; + case kIntrinsicTanh: + return Intrinsics::kMathTanh; + // Misc math. case kIntrinsicSqrt: return Intrinsics::kMathSqrt; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 5329b5c1b7..e8181bbb06 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -240,178 +240,6 @@ void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); } -static void GenIntegerRotate(LocationSummary* locations, - ArmAssembler* assembler, - bool is_left) { - Register in = locations->InAt(0).AsRegister<Register>(); - Location rhs = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); - - if (rhs.IsConstant()) { - // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], - // so map all rotations to a +ve. equivalent in that range. - // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) - uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F; - if (rot) { - // Rotate, mapping left rotations to right equivalents if necessary. - // (e.g. left by 2 bits == right by 30.) - __ Ror(out, in, is_left ? (0x20 - rot) : rot); - } else if (out != in) { - __ Mov(out, in); - } - } else { - if (is_left) { - __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0)); - __ Ror(out, in, out); - } else { - __ Ror(out, in, rhs.AsRegister<Register>()); - } - } -} - -// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer -// rotates by swapping input regs (effectively rotating by the first 32-bits of -// a larger rotation) or flipping direction (thus treating larger right/left -// rotations as sub-word sized rotations in the other direction) as appropriate. -static void GenLongRotate(LocationSummary* locations, - ArmAssembler* assembler, - bool is_left) { - Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Location rhs = locations->InAt(1); - Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); - - if (rhs.IsConstant()) { - uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue(); - // Map all left rotations to right equivalents. - if (is_left) { - rot = 0x40 - rot; - } - // Map all rotations to +ve. equivalents on the interval [0,63]. - rot &= 0x3F; - // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate - // logic below to a simple pair of binary orr. - // (e.g. 34 bits == in_reg swap + 2 bits right.) - if (rot >= 0x20) { - rot -= 0x20; - std::swap(in_reg_hi, in_reg_lo); - } - // Rotate, or mov to out for zero or word size rotations. - if (rot) { - __ Lsr(out_reg_hi, in_reg_hi, rot); - __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot)); - __ Lsr(out_reg_lo, in_reg_lo, rot); - __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot)); - } else { - __ Mov(out_reg_lo, in_reg_lo); - __ Mov(out_reg_hi, in_reg_hi); - } - } else { - Register shift_left = locations->GetTemp(0).AsRegister<Register>(); - Register shift_right = locations->GetTemp(1).AsRegister<Register>(); - Label end; - Label right; - - __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); - __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6); - __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep); - - if (is_left) { - __ b(&right, CS); - } else { - __ b(&right, CC); - std::swap(shift_left, shift_right); - } - - // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). - // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). - __ Lsl(out_reg_hi, in_reg_hi, shift_left); - __ Lsr(out_reg_lo, in_reg_lo, shift_right); - __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); - __ Lsl(out_reg_lo, in_reg_lo, shift_left); - __ Lsr(shift_left, in_reg_hi, shift_right); - __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); - __ b(&end); - - // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). - // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). - __ Bind(&right); - __ Lsr(out_reg_hi, in_reg_hi, shift_right); - __ Lsl(out_reg_lo, in_reg_lo, shift_left); - __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); - __ Lsr(out_reg_lo, in_reg_lo, shift_right); - __ Lsl(shift_right, in_reg_hi, shift_left); - __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); - - __ Bind(&end); - } -} - -void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) { - GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false); -} - -void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - if (invoke->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) { - GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false); -} - -void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) { - GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true); -} - -void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - if (invoke->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) { - GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true); -} - static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { Location in = locations->InAt(0); Location out = locations->Out(); @@ -1700,8 +1528,12 @@ void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) @@ -1718,6 +1550,23 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 962c4d5167..6b34daadf0 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -330,103 +330,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); } -static void GenRotateRight(LocationSummary* locations, - Primitive::Type type, - vixl::MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); - - Location in = locations->InAt(0); - Location out = locations->Out(); - Operand rhs = OperandFrom(locations->InAt(1), type); - - if (rhs.IsImmediate()) { - uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - shift); - } else { - DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - rhs.reg()); - } -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) { - GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) { - GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); -} - -static void GenRotateLeft(LocationSummary* locations, - Primitive::Type type, - vixl::MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); - - Location in = locations->InAt(0); - Location out = locations->Out(); - Operand rhs = OperandFrom(locations->InAt(1), type); - - if (rhs.IsImmediate()) { - uint32_t regsize = RegisterFrom(in, type).SizeInBits(); - uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1); - __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift); - } else { - DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0); - __ Neg(RegisterFrom(out, type), - Operand(RegisterFrom(locations->InAt(1), type))); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - RegisterFrom(out, type)); - } -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) { - GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) { - GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); -} - static void GenReverse(LocationSummary* locations, Primitive::Type type, vixl::MacroAssembler* masm) { @@ -1527,11 +1430,33 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) + #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 8f1d5e1c4d..96f43a0f74 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -51,6 +51,23 @@ V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \ V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \ V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCos, kStatic, kNeedsEnvironmentOrCache) \ + V(MathSin, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \ + V(MathExp, kStatic, kNeedsEnvironmentOrCache) \ + V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \ + V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \ + V(MathLog, kStatic, kNeedsEnvironmentOrCache) \ + V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \ + V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \ + V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \ + V(MathTan, kStatic, kNeedsEnvironmentOrCache) \ + V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \ V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \ V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \ V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \ diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 9ecce0e93a..06fab616ad 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -989,6 +989,23 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 36e1b20e4e..8aa7d9ff6f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1730,6 +1730,24 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) + #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 5b67cdefa3..fd454d8322 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -788,6 +788,195 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&done); } +static void CreateFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); +} + +static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->WillCall()); + DCHECK(invoke->IsInvokeStaticOrDirect()); + X86Assembler* assembler = codegen->GetAssembler(); + + // We need some place to pass the parameters. + __ subl(ESP, Immediate(16)); + __ cfi().AdjustCFAOffset(16); + + // Pass the parameters at the bottom of the stack. + __ movsd(Address(ESP, 0), XMM0); + + // If we have a second parameter, pass it next. + if (invoke->GetNumberOfArguments() == 2) { + __ movsd(Address(ESP, 8), XMM1); + } + + // Now do the actual call. + __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry))); + + // Extract the return value from the FP stack. + __ fstpl(Address(ESP, 0)); + __ movsd(XMM0, Address(ESP, 0)); + + // And clean up the stack. + __ addl(ESP, Immediate(16)); + __ cfi().AdjustCFAOffset(-16); + + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTanh); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); +} + +void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickNextAfter); +} + void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1601,12 +1790,27 @@ static void GenUnsafeGet(HInvoke* invoke, Location output_loc = locations->Out(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { Register output = output_loc.AsRegister<Register>(); __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + break; + } + + case Primitive::kPrimNot: { + Register output = output_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + codegen->GenerateReadBarrierSlow( + invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + } + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ MaybeUnpoisonHeapReference(output); } break; } @@ -1634,8 +1838,10 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, - bool is_long, bool is_volatile) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type, + bool is_volatile) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1647,7 +1853,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - if (is_long) { + if (type == Primitive::kPrimLong) { if (is_volatile) { // Need to use XMM to read volatile. locations->AddTemp(Location::RequiresFpuRegister()); @@ -1658,25 +1864,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke } else { locations->SetOut(Location::RequiresRegister()); } + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true); } @@ -2277,56 +2488,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(assembler, invoke, /* is_long */ true); } -static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL or a constant. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) { - LocationSummary* locations = invoke->GetLocations(); - Register first_reg = locations->InAt(0).AsRegister<Register>(); - Location second = locations->InAt(1); - - if (second.IsRegister()) { - Register second_reg = second.AsRegister<Register>(); - if (is_left) { - __ roll(first_reg, second_reg); - } else { - __ rorl(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (is_left) { - __ roll(first_reg, imm); - } else { - __ rorl(first_reg, imm); - } - } -} - -void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_left */ false); -} - // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -2337,6 +2498,8 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ecd129f31e..ce737e3f7e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -703,6 +703,188 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&done); } +static void CreateFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + + // We have to ensure that the native code doesn't clobber the XMM registers which are + // non-volatile for ART, but volatile for Native calls. This will ensure that they are + // saved in the prologue and properly restored. + for (auto fp_reg : non_volatile_xmm_regs) { + locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); + } +} + +static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, + QuickEntrypointEnum entry) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->WillCall()); + DCHECK(invoke->IsInvokeStaticOrDirect()); + X86_64Assembler* assembler = codegen->GetAssembler(); + + __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true)); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTanh); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + + // We have to ensure that the native code doesn't clobber the XMM registers which are + // non-volatile for ART, but volatile for Native calls. This will ensure that they are + // saved in the prologue and properly restored. + for (auto fp_reg : non_volatile_xmm_regs) { + locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickNextAfter); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1735,16 +1917,30 @@ static void GenUnsafeGet(HInvoke* invoke, Location offset_loc = locations->InAt(2); CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); Location output_loc = locations->Out(); - CpuRegister output = locations->Out().AsRegister<CpuRegister>(); + CpuRegister output = output_loc.AsRegister<CpuRegister>(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimNot: __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + break; + + case Primitive::kPrimNot: { + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + codegen->GenerateReadBarrierSlow( + invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + } + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ MaybeUnpoisonHeapReference(output); } break; + } case Primitive::kPrimLong: __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); @@ -1756,7 +1952,9 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1769,25 +1967,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } @@ -2295,92 +2498,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(assembler, invoke, /* is_long */ true); } -static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL or a constant. - locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) { - LocationSummary* locations = invoke->GetLocations(); - CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); - Location second = locations->InAt(1); - - if (is_long) { - if (second.IsRegister()) { - CpuRegister second_reg = second.AsRegister<CpuRegister>(); - if (is_left) { - __ rolq(first_reg, second_reg); - } else { - __ rorq(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue); - if (is_left) { - __ rolq(first_reg, imm); - } else { - __ rorq(first_reg, imm); - } - } - } else { - if (second.IsRegister()) { - CpuRegister second_reg = second.AsRegister<CpuRegister>(); - if (is_left) { - __ roll(first_reg, second_reg); - } else { - __ rorl(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (is_left) { - __ roll(first_reg, imm); - } else { - __ rorl(first_reg, imm); - } - } - } -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false); -} - -void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false); -} - // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -2390,6 +2507,10 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE } UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 2bb769a430..956de2cb8a 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -16,7 +16,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "licm.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -27,7 +26,7 @@ namespace art { /** * Fixture class for the LICM tests. */ -class LICMTest : public testing::Test { +class LICMTest : public CommonCompilerTest { public: LICMTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -70,16 +69,16 @@ class LICMTest : public testing::Test { loop_preheader_->AddInstruction(new (&allocator_) HGoto()); loop_header_->AddInstruction(new (&allocator_) HIf(parameter_)); loop_body_->AddInstruction(new (&allocator_) HGoto()); + return_->AddInstruction(new (&allocator_) HReturnVoid()); exit_->AddInstruction(new (&allocator_) HExit()); } // Performs LICM optimizations (after proper set up). void PerformLICM() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); - LICM licm(graph_, side_effects); - licm.Run(); + LICM(graph_, side_effects).Run(); } // General building fields. @@ -169,10 +168,10 @@ TEST_F(LICMTest, ArrayHoisting) { // Populate the loop with instructions: set/get array with different types. HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, constant_, Primitive::kPrimLong, 0); + parameter_, constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, constant_, constant_, Primitive::kPrimInt, 0); + parameter_, constant_, constant_, Primitive::kPrimShort, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); @@ -187,10 +186,10 @@ TEST_F(LICMTest, NoArrayHoisting) { // Populate the loop with instructions: set/get array with same types. HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, constant_, Primitive::kPrimLong, 0); + parameter_, constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, get_array, constant_, Primitive::kPrimLong, 0); + parameter_, get_array, constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index a059766e00..ed275b1544 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -29,13 +29,12 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "ssa_builder.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LinearizeTest : public CommonCompilerTest {}; + template <size_t number_of_blocks> static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) { ArenaPool pool; @@ -46,7 +45,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->TryBuildingSsa(); + TransformToSsa(graph); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); @@ -60,7 +59,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb } } -TEST(LinearizeTest, CFG1) { +TEST_F(LinearizeTest, CFG1) { // Structure of this graph (+ are back edges) // Block0 // | @@ -85,7 +84,7 @@ TEST(LinearizeTest, CFG1) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG2) { +TEST_F(LinearizeTest, CFG2) { // Structure of this graph (+ are back edges) // Block0 // | @@ -110,7 +109,7 @@ TEST(LinearizeTest, CFG2) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG3) { +TEST_F(LinearizeTest, CFG3) { // Structure of this graph (+ are back edges) // Block0 // | @@ -137,7 +136,7 @@ TEST(LinearizeTest, CFG3) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG4) { +TEST_F(LinearizeTest, CFG4) { /* Structure of this graph (+ are back edges) // Block0 // | @@ -167,7 +166,7 @@ TEST(LinearizeTest, CFG4) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG5) { +TEST_F(LinearizeTest, CFG5) { /* Structure of this graph (+ are back edges) // Block0 // | @@ -197,7 +196,7 @@ TEST(LinearizeTest, CFG5) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG6) { +TEST_F(LinearizeTest, CFG6) { // Block0 // | // Block1 @@ -223,7 +222,7 @@ TEST(LinearizeTest, CFG6) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG7) { +TEST_F(LinearizeTest, CFG7) { // Structure of this graph (+ are back edges) // Block0 // | diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 7f67560692..926f9399a5 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -27,10 +27,10 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LiveRangesTest : public CommonCompilerTest {}; + static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); @@ -39,13 +39,13 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); - graph->TryBuildingSsa(); + TransformToSsa(graph); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); return graph; } -TEST(LiveRangesTest, CFG1) { +TEST_F(LiveRangesTest, CFG1) { /* * Test the following snippet: * return 0; @@ -83,7 +83,7 @@ TEST(LiveRangesTest, CFG1) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG2) { +TEST_F(LiveRangesTest, CFG2) { /* * Test the following snippet: * var a = 0; @@ -131,7 +131,7 @@ TEST(LiveRangesTest, CFG2) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG3) { +TEST_F(LiveRangesTest, CFG3) { /* * Test the following snippet: * var a = 0; @@ -204,7 +204,7 @@ TEST(LiveRangesTest, CFG3) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, Loop1) { +TEST_F(LiveRangesTest, Loop1) { /* * Test the following snippet: * var a = 0; @@ -284,7 +284,7 @@ TEST(LiveRangesTest, Loop1) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, Loop2) { +TEST_F(LiveRangesTest, Loop2) { /* * Test the following snippet: * var a = 0; @@ -360,7 +360,7 @@ TEST(LiveRangesTest, Loop2) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG4) { +TEST_F(LiveRangesTest, CFG4) { /* * Test the following snippet: * var a = 0; diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 9d7d0b6c67..7736eedae1 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -27,10 +27,10 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LivenessTest : public CommonCompilerTest {}; + static void DumpBitVector(BitVector* vector, std::ostream& buffer, size_t count, @@ -51,7 +51,7 @@ static void TestCode(const uint16_t* data, const char* expected) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->TryBuildingSsa(); + TransformToSsa(graph); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( @@ -75,7 +75,7 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_STREQ(expected, buffer.str().c_str()); } -TEST(LivenessTest, CFG1) { +TEST_F(LivenessTest, CFG1) { const char* expected = "Block 0\n" " live in: (0)\n" @@ -98,7 +98,7 @@ TEST(LivenessTest, CFG1) { TestCode(data, expected); } -TEST(LivenessTest, CFG2) { +TEST_F(LivenessTest, CFG2) { const char* expected = "Block 0\n" " live in: (0)\n" @@ -120,7 +120,7 @@ TEST(LivenessTest, CFG2) { TestCode(data, expected); } -TEST(LivenessTest, CFG3) { +TEST_F(LivenessTest, CFG3) { const char* expected = "Block 0\n" // entry block " live in: (000)\n" @@ -149,7 +149,7 @@ TEST(LivenessTest, CFG3) { TestCode(data, expected); } -TEST(LivenessTest, CFG4) { +TEST_F(LivenessTest, CFG4) { // var a; // if (0 == 0) { // a = 5; @@ -197,7 +197,7 @@ TEST(LivenessTest, CFG4) { TestCode(data, expected); } -TEST(LivenessTest, CFG5) { +TEST_F(LivenessTest, CFG5) { // var a = 0; // if (0 == 0) { // } else { @@ -242,7 +242,7 @@ TEST(LivenessTest, CFG5) { TestCode(data, expected); } -TEST(LivenessTest, Loop1) { +TEST_F(LivenessTest, Loop1) { // Simple loop with one preheader and one back edge. // var a = 0; // while (a == a) { @@ -288,7 +288,7 @@ TEST(LivenessTest, Loop1) { TestCode(data, expected); } -TEST(LivenessTest, Loop3) { +TEST_F(LivenessTest, Loop3) { // Test that the returned value stays live in a preceding loop. // var a = 0; // while (a == a) { @@ -335,7 +335,7 @@ TEST(LivenessTest, Loop3) { } -TEST(LivenessTest, Loop4) { +TEST_F(LivenessTest, Loop4) { // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. // var a = 0; @@ -387,7 +387,7 @@ TEST(LivenessTest, Loop4) { TestCode(data, expected); } -TEST(LivenessTest, Loop5) { +TEST_F(LivenessTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: @@ -443,7 +443,7 @@ TEST(LivenessTest, Loop5) { TestCode(data, expected); } -TEST(LivenessTest, Loop6) { +TEST_F(LivenessTest, Loop6) { // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2) const char* expected = @@ -494,7 +494,7 @@ TEST(LivenessTest, Loop6) { } -TEST(LivenessTest, Loop7) { +TEST_F(LivenessTest, Loop7) { // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = @@ -548,7 +548,7 @@ TEST(LivenessTest, Loop7) { TestCode(data, expected); } -TEST(LivenessTest, Loop8) { +TEST_F(LivenessTest, Loop8) { // var a = 0; // while (a == a) { // a = a + a; diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 389ada7504..adde00464b 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -335,16 +335,24 @@ class HeapLocationCollector : public HGraphVisitor { return true; } - ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) { - ReferenceInfo* ref_info = FindReferenceInfoOf(ref); + ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { + ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos); + ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); ref_info_array_.push_back(ref_info); } return ref_info; } + void CreateReferenceInfoForReferenceType(HInstruction* instruction) { + if (instruction->GetType() != Primitive::kPrimNot) { + return; + } + DCHECK(FindReferenceInfoOf(instruction) == nullptr); + GetOrCreateReferenceInfo(instruction); + } + HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, size_t offset, HInstruction* index, @@ -378,6 +386,7 @@ class HeapLocationCollector : public HGraphVisitor { void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { @@ -387,6 +396,7 @@ class HeapLocationCollector : public HGraphVisitor { void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { @@ -399,6 +409,7 @@ class HeapLocationCollector : public HGraphVisitor { void VisitArrayGet(HArrayGet* instruction) OVERRIDE { VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + CreateReferenceInfoForReferenceType(instruction); } void VisitArraySet(HArraySet* instruction) OVERRIDE { @@ -408,7 +419,23 @@ class HeapLocationCollector : public HGraphVisitor { void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - GetOrCreateReferenceInfo(new_instance); + CreateReferenceInfoForReferenceType(new_instance); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitParameterValue(HParameterValue* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); } void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 926bc156cf..bb0b545c1e 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -198,10 +198,38 @@ void HGraph::ComputeDominanceInformation() { } } -void HGraph::TransformToSsa() { - DCHECK(!reverse_post_order_.empty()); - SsaBuilder ssa_builder(this); - ssa_builder.BuildSsa(); +BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) { + BuildDominatorTree(); + + // The SSA builder requires loops to all be natural. Specifically, the dead phi + // elimination phase checks the consistency of the graph when doing a post-order + // visit for eliminating dead phis: a dead phi can only have loop header phi + // users remaining when being visited. + BuildSsaResult result = AnalyzeNaturalLoops(); + if (result != kBuildSsaSuccess) { + return result; + } + + // Precompute per-block try membership before entering the SSA builder, + // which needs the information to build catch block phis from values of + // locals at throwing instructions inside try blocks. + ComputeTryBlockInformation(); + + // Create the inexact Object reference type and store it in the HGraph. + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + inexact_object_rti_ = ReferenceTypeInfo::Create( + handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)), + /* is_exact */ false); + + // Tranforms graph to SSA form. + result = SsaBuilder(this, handles).BuildSsa(); + if (result != kBuildSsaSuccess) { + return result; + } + + in_ssa_form_ = true; + return kBuildSsaSuccess; } HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) { @@ -410,7 +438,7 @@ void HGraph::SimplifyCFG() { } } -bool HGraph::AnalyzeNaturalLoops() const { +BuildSsaResult HGraph::AnalyzeNaturalLoops() const { // Order does not matter. for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -418,16 +446,16 @@ bool HGraph::AnalyzeNaturalLoops() const { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because // they only approximate the real control flow. Bail out for now. - return false; + return kBuildSsaFailThrowCatchLoop; } HLoopInformation* info = block->GetLoopInformation(); if (!info->Populate()) { // Abort if the loop is non natural. We currently bailout in such cases. - return false; + return kBuildSsaFailNonNaturalLoop; } } } - return true; + return kBuildSsaSuccess; } void HGraph::InsertConstant(HConstant* constant) { @@ -446,8 +474,13 @@ HNullConstant* HGraph::GetNullConstant(uint32_t dex_pc) { // id and/or any invariants the graph is assuming when adding new instructions. if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { cached_null_constant_ = new (arena_) HNullConstant(dex_pc); + cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_); InsertConstant(cached_null_constant_); } + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid()); + } return cached_null_constant_; } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3e38e9f2a4..55e436f0b7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -98,6 +98,13 @@ enum IfCondition { kCondAE, // >= }; +enum BuildSsaResult { + kBuildSsaFailNonNaturalLoop, + kBuildSsaFailThrowCatchLoop, + kBuildSsaFailAmbiguousArrayGet, + kBuildSsaSuccess, +}; + class HInstructionList : public ValueObject { public: HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {} @@ -143,6 +150,122 @@ class HInstructionList : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HInstructionList); }; +class ReferenceTypeInfo : ValueObject { + public: + typedef Handle<mirror::Class> TypeHandle; + + static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) { + // The constructor will check that the type_handle is valid. + return ReferenceTypeInfo(type_handle, is_exact); + } + + static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); } + + static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) { + return handle.GetReference() != nullptr; + } + + bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { + return IsValidHandle(type_handle_); + } + + bool IsExact() const { return is_exact_; } + + bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsObjectClass(); + } + + bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsStringClass(); + } + + bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); + } + + bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsInterface(); + } + + bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsArrayClass(); + } + + bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsPrimitiveArray(); + } + + bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); + } + + bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + if (!rti.IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom( + rti.GetTypeHandle()->GetComponentType()); + } + + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } + + bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); + return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); + return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && + GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + // Returns true if the type information provide the same amount of details. + // Note that it does not mean that the instructions have the same actual type + // (because the type can be the result of a merge). + bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) { + if (!IsValid() && !rti.IsValid()) { + // Invalid types are equal. + return true; + } + if (!IsValid() || !rti.IsValid()) { + // One is valid, the other not. + return false; + } + return IsExact() == rti.IsExact() + && GetTypeHandle().Get() == rti.GetTypeHandle().Get(); + } + + private: + ReferenceTypeInfo(); + ReferenceTypeInfo(TypeHandle type_handle, bool is_exact); + + // The class of the object. + TypeHandle type_handle_; + // Whether or not the type is exact or a superclass of the actual type. + // Whether or not we have any information about this type. + bool is_exact_; +}; + +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); + // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocGraph> { public: @@ -179,7 +302,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), - cached_current_method_(nullptr) { + cached_current_method_(nullptr), + inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -197,36 +321,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void AddBlock(HBasicBlock* block); - // Try building the SSA form of this graph, with dominance computation and loop - // recognition. Returns whether it was successful in doing all these steps. - bool TryBuildingSsa() { - BuildDominatorTree(); - // The SSA builder requires loops to all be natural. Specifically, the dead phi - // elimination phase checks the consistency of the graph when doing a post-order - // visit for eliminating dead phis: a dead phi can only have loop header phi - // users remaining when being visited. - if (!AnalyzeNaturalLoops()) return false; - // Precompute per-block try membership before entering the SSA builder, - // which needs the information to build catch block phis from values of - // locals at throwing instructions inside try blocks. - ComputeTryBlockInformation(); - TransformToSsa(); - in_ssa_form_ = true; - return true; - } + // Try building the SSA form of this graph, with dominance computation and + // loop recognition. Returns a code specifying that it was successful or the + // reason for failure. + BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles); void ComputeDominanceInformation(); void ClearDominanceInformation(); void BuildDominatorTree(); - void TransformToSsa(); void SimplifyCFG(); void SimplifyCatchBlocks(); - // Analyze all natural loops in this graph. Returns false if one - // loop is not natural, that is the header does not dominate the - // back edge. - bool AnalyzeNaturalLoops() const; + // Analyze all natural loops in this graph. Returns a code specifying that it + // was successful or the reason for failure. The method will fail if a loop + // is not natural, that is the header does not dominate a back edge, or if it + // is a throw-catch loop, i.e. the header is a catch block. + BuildSsaResult AnalyzeNaturalLoops() const; // Iterate over blocks to compute try block membership. Needs reverse post // order and loop information. @@ -487,6 +598,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // (such as when the superclass could not be found). ArtMethod* art_method_; + // Keep the RTI of inexact Object to avoid having to pass stack handle + // collection pointer to passes which may create NullConstant. + ReferenceTypeInfo inexact_object_rti_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -1082,6 +1197,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Rem, BinaryOperation) \ M(Return, Instruction) \ M(ReturnVoid, Instruction) \ + M(Ror, BinaryOperation) \ M(Shl, BinaryOperation) \ M(Shr, BinaryOperation) \ M(StaticFieldGet, Instruction) \ @@ -1673,122 +1789,6 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; -class ReferenceTypeInfo : ValueObject { - public: - typedef Handle<mirror::Class> TypeHandle; - - static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) { - // The constructor will check that the type_handle is valid. - return ReferenceTypeInfo(type_handle, is_exact); - } - - static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); } - - static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) { - return handle.GetReference() != nullptr; - } - - bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { - return IsValidHandle(type_handle_); - } - - bool IsExact() const { return is_exact_; } - - bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsObjectClass(); - } - - bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsStringClass(); - } - - bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); - } - - bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsInterface(); - } - - bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsArrayClass(); - } - - bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsPrimitiveArray(); - } - - bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); - } - - bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - if (!IsExact()) return false; - if (!IsArrayClass()) return false; - return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - if (!IsExact()) return false; - if (!IsArrayClass()) return false; - if (!rti.IsArrayClass()) return false; - return GetTypeHandle()->GetComponentType()->IsAssignableFrom( - rti.GetTypeHandle()->GetComponentType()); - } - - Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - - bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - DCHECK(rti.IsValid()); - return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - DCHECK(rti.IsValid()); - return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && - GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - // Returns true if the type information provide the same amount of details. - // Note that it does not mean that the instructions have the same actual type - // (because the type can be the result of a merge). - bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) { - if (!IsValid() && !rti.IsValid()) { - // Invalid types are equal. - return true; - } - if (!IsValid() || !rti.IsValid()) { - // One is valid, the other not. - return false; - } - return IsExact() == rti.IsExact() - && GetTypeHandle().Get() == rti.GetTypeHandle().Get(); - } - - private: - ReferenceTypeInfo(); - ReferenceTypeInfo(TypeHandle type_handle, bool is_exact); - - // The class of the object. - TypeHandle type_handle_; - // Whether or not the type is exact or a superclass of the actual type. - // Whether or not we have any information about this type. - bool is_exact_; -}; - -std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); - class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: HInstruction(SideEffects side_effects, uint32_t dex_pc) @@ -4198,6 +4198,44 @@ class HXor : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HXor); }; +class HRor : public HBinaryOperation { + public: + HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance) + : HBinaryOperation(result_type, value, distance) {} + + template <typename T, typename U, typename V> + T Compute(T x, U y, V max_shift_value) const { + static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value, + "V is not the unsigned integer type corresponding to T"); + V ux = static_cast<V>(x); + if ((y & max_shift_value) == 0) { + return static_cast<T>(ux); + } else { + const V reg_bits = sizeof(T) * 8; + return static_cast<T>(ux >> (y & max_shift_value)) | + (x << (reg_bits - (y & max_shift_value))); + } + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); + } + + DECLARE_INSTRUCTION(Ror); + + private: + DISALLOW_COPY_AND_ASSIGN(HRor); +}; + // The value of a parameter in this method. Its location depends on // the calling convention. class HParameterValue : public HExpression<0> { @@ -4378,7 +4416,16 @@ class HPhi : public HInstruction { void RemoveInputAt(size_t index); Primitive::Type GetType() const OVERRIDE { return type_; } - void SetType(Primitive::Type type) { type_ = type; } + void SetType(Primitive::Type new_type) { + // Make sure that only valid type changes occur. The following are allowed: + // (1) int -> float/ref (primitive type propagation), + // (2) long -> double (primitive type propagation). + DCHECK(type_ == new_type || + (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) || + (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimNot) || + (type_ == Primitive::kPrimLong && new_type == Primitive::kPrimDouble)); + type_ = new_type; + } bool CanBeNull() const OVERRIDE { return can_be_null_; } void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; } @@ -4618,7 +4665,21 @@ class HArrayGet : public HExpression<2> { return false; } - void SetType(Primitive::Type type) { type_ = type; } + bool IsEquivalentOf(HArrayGet* other) const { + bool result = (GetDexPc() == other->GetDexPc()); + if (kIsDebugBuild && result) { + DCHECK_EQ(GetBlock(), other->GetBlock()); + DCHECK_EQ(GetArray(), other->GetArray()); + DCHECK_EQ(GetIndex(), other->GetIndex()); + if (Primitive::IsIntOrLongType(GetType())) { + DCHECK(Primitive::IsFloatingPointType(other->GetType())); + } else { + DCHECK(Primitive::IsFloatingPointType(GetType())); + DCHECK(Primitive::IsIntOrLongType(other->GetType())); + } + } + return result; + } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -4925,9 +4986,13 @@ class HLoadClass : public HExpression<1> { class HLoadString : public HExpression<1> { public: - HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc) + HLoadString(HCurrentMethod* current_method, + uint32_t string_index, + uint32_t dex_pc, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), - string_index_(string_index) { + string_index_(string_index), + is_in_dex_cache_(is_in_dex_cache) { SetRawInputAt(0, current_method); } @@ -4945,6 +5010,7 @@ class HLoadString : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return false; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; } bool CanBeNull() const OVERRIDE { return false; } + bool IsInDexCache() const { return is_in_dex_cache_; } static SideEffects SideEffectsForArchRuntimeCalls() { return SideEffects::CanTriggerGC(); @@ -4954,6 +5020,7 @@ class HLoadString : public HExpression<1> { private: const uint32_t string_index_; + const bool is_in_dex_cache_; DISALLOW_COPY_AND_ASSIGN(HLoadString); }; diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index e8439354af..18405f2623 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -17,6 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ +#include "nodes.h" + namespace art { class HArm64DataProcWithShifterOp : public HExpression<2> { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 831b626c4f..ba435180e5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -501,11 +501,8 @@ static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - ScopedThreadSuspension sts(soa.Self(), kNative); - + PassObserver* pass_observer, + StackHandleScopeCollection* handles) { ArenaAllocator* arena = graph->GetArena(); HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); @@ -522,29 +519,23 @@ static void RunOptimizations(HGraph* graph, LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); - ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_bce"); - InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations1[] = { intrinsics, + sharpening, fold1, simplify1, - type_propagation, - sharpening, dce1, - simplify2 }; RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); HOptimization* optimizations2[] = { // BooleanSimplifier depends on the InstructionSimplifier removing @@ -557,13 +548,13 @@ static void RunOptimizations(HGraph* graph, induction, bce, fold3, // evaluates code generated by dynamic bce - simplify3, + simplify2, lse, dce2, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - simplify4, + simplify3, }; RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); @@ -768,14 +759,29 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); + if (run_optimizations_) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); - if (!graph->TryBuildingSsa()) { - // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() - << ": it contains a non natural loop"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + BuildSsaResult result = graph->TryBuildingSsa(&handles); + if (result != kBuildSsaSuccess) { + switch (result) { + case kBuildSsaFailNonNaturalLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop); + break; + case kBuildSsaFailThrowCatchLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + break; + case kBuildSsaFailAmbiguousArrayGet: + MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayGet); + break; + case kBuildSsaSuccess: + UNREACHABLE(); + } pass_observer.SetGraphInBadState(); return nullptr; } @@ -786,7 +792,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver, compilation_stats_.get(), dex_compilation_unit, - &pass_observer); + &pass_observer, + &handles); codegen->CompileOptimized(code_allocator); } else { codegen->CompileBaseline(code_allocator); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 6296eedfb0..4713514bb2 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,9 @@ enum MethodCompilationStat { kRemovedDeadInstruction, kRemovedNullCheck, kNotCompiledBranchOutsideMethodCode, - kNotCompiledCannotBuildSSA, + kNotCompiledNonNaturalLoop, + kNotCompiledThrowCatchLoop, + kNotCompiledAmbiguousArrayGet, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, @@ -104,7 +106,9 @@ class OptimizingCompilerStats { case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; case kRemovedNullCheck: name = "RemovedNullCheck"; break; case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break; - case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break; + case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break; + case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break; + case kNotCompiledAmbiguousArrayGet : name = "NotCompiledAmbiguousArrayGet"; break; case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break; case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 350f0b14ab..af3a005304 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -19,9 +19,13 @@ #include "nodes.h" #include "builder.h" +#include "common_compiler_test.h" #include "compiler/dex/pass_manager.h" #include "dex_file.h" #include "dex_instruction.h" +#include "handle_scope-inl.h" +#include "scoped_thread_state_change.h" +#include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "gtest/gtest.h" @@ -42,7 +46,6 @@ namespace art { #define FIVE_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(5, __VA_ARGS__) #define SIX_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(6, __VA_ARGS__) - LiveInterval* BuildInterval(const size_t ranges[][2], size_t number_of_ranges, ArenaAllocator* allocator, @@ -111,6 +114,12 @@ inline bool IsRemoved(HInstruction* instruction) { return instruction->GetBlock() == nullptr; } +inline void TransformToSsa(HGraph* graph) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess); +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc deleted file mode 100644 index bde54ee977..0000000000 --- a/compiler/optimizing/primitive_type_propagation.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "primitive_type_propagation.h" - -#include "nodes.h" -#include "ssa_builder.h" - -namespace art { - -static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { - // We trust the verifier has already done the necessary checking. - switch (existing) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - case Primitive::kPrimNot: - return existing; - default: - // Phis are initialized with a void type, so if we are asked - // to merge with a void type, we should use the existing one. - return new_type == Primitive::kPrimVoid - ? existing - : HPhi::ToPhiType(new_type); - } -} - -// Re-compute and update the type of the instruction. Returns -// whether or not the type was changed. -bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { - DCHECK(phi->IsLive()); - Primitive::Type existing = phi->GetType(); - - Primitive::Type new_type = existing; - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - Primitive::Type input_type = phi->InputAt(i)->GetType(); - new_type = MergeTypes(new_type, input_type); - } - phi->SetType(new_type); - - if (new_type == Primitive::kPrimDouble - || new_type == Primitive::kPrimFloat - || new_type == Primitive::kPrimNot) { - // If the phi is of floating point type, we need to update its inputs to that - // type. For inputs that are phis, we need to recompute their types. - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - if (input->GetType() != new_type) { - HInstruction* equivalent = (new_type == Primitive::kPrimNot) - ? SsaBuilder::GetReferenceTypeEquivalent(input) - : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - AddToWorklist(equivalent->AsPhi()); - } else if (equivalent == input) { - // The input has changed its type. It can be an input of other phis, - // so we need to put phi users in the work list. - AddDependentInstructionsToWorklist(equivalent); - } - } - } - } - - return existing != new_type; -} - -void PrimitiveTypePropagation::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - ProcessWorklist(); -} - -void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { - if (block->IsLoopHeader()) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - if (phi->IsLive()) { - AddToWorklist(phi); - } - } - } else { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Eagerly compute the type of the phi, for quicker convergence. Note - // that we don't need to add users to the worklist because we are - // doing a reverse post-order visit, therefore either the phi users are - // non-loop phi and will be visited later in the visit, or are loop-phis, - // and they are already in the work list. - HPhi* phi = it.Current()->AsPhi(); - if (phi->IsLive()) { - UpdateType(phi); - } - } - } -} - -void PrimitiveTypePropagation::ProcessWorklist() { - while (!worklist_.empty()) { - HPhi* instruction = worklist_.back(); - worklist_.pop_back(); - if (UpdateType(instruction)) { - AddDependentInstructionsToWorklist(instruction); - } - } -} - -void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { - DCHECK(instruction->IsLive()); - worklist_.push_back(instruction); -} - -void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { - for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) { - AddToWorklist(phi); - } - } -} - -} // namespace art diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h deleted file mode 100644 index 212fcfc69f..0000000000 --- a/compiler/optimizing/primitive_type_propagation.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ -#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ - -#include "base/arena_containers.h" -#include "nodes.h" - -namespace art { - -// Compute and propagate primitive types of phis in the graph. -class PrimitiveTypePropagation : public ValueObject { - public: - explicit PrimitiveTypePropagation(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) { - worklist_.reserve(kDefaultWorklistSize); - } - - void Run(); - - private: - void VisitBasicBlock(HBasicBlock* block); - void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HInstruction* instruction); - bool UpdateType(HPhi* phi); - - HGraph* const graph_; - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - - DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index fea903d9cf..94a297c9e6 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -40,7 +40,6 @@ class RTPVisitor : public HGraphDelegateVisitor { throwable_class_handle_(throwable_class_handle), worklist_(worklist) {} - void VisitNullConstant(HNullConstant* null_constant) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; @@ -71,8 +70,6 @@ class RTPVisitor : public HGraphDelegateVisitor { ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; ArenaVector<HInstruction*>* worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, @@ -171,9 +168,13 @@ static void ForEachUntypedInstruction(HGraph* graph, Functor fn) { ScopedObjectAccess soa(Thread::Current()); for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) { for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { - fn(instr); + HPhi* phi = it.Current()->AsPhi(); + // Note that the graph may contain dead phis when run from the SsaBuilder. + // Skip those as they might have a type conflict and will be removed anyway. + if (phi->IsLive() && + phi->GetType() == Primitive::kPrimNot && + !phi->GetReferenceTypeInfo().IsValid()) { + fn(phi); } } for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) { @@ -376,6 +377,75 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { } } +// Returns true if one of the patterns below has been recognized. If so, the +// InstanceOf instruction together with the true branch of `ifInstruction` will +// be returned using the out parameters. +// Recognized patterns: +// (1) patterns equivalent to `if (obj instanceof X)` +// (a) InstanceOf -> Equal to 1 -> If +// (b) InstanceOf -> NotEqual to 0 -> If +// (c) InstanceOf -> If +// (2) patterns equivalent to `if (!(obj instanceof X))` +// (a) InstanceOf -> Equal to 0 -> If +// (b) InstanceOf -> NotEqual to 1 -> If +// (c) InstanceOf -> BooleanNot -> If +static bool MatchIfInstanceOf(HIf* ifInstruction, + /* out */ HInstanceOf** instanceOf, + /* out */ HBasicBlock** trueBranch) { + HInstruction* input = ifInstruction->InputAt(0); + + if (input->IsEqual()) { + HInstruction* rhs = input->AsEqual()->GetConstantRight(); + if (rhs != nullptr) { + HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft(); + if (lhs->IsInstanceOf() && rhs->IsIntConstant()) { + if (rhs->AsIntConstant()->IsOne()) { + // Case (1a) + *trueBranch = ifInstruction->IfTrueSuccessor(); + } else { + // Case (2a) + DCHECK(rhs->AsIntConstant()->IsZero()); + *trueBranch = ifInstruction->IfFalseSuccessor(); + } + *instanceOf = lhs->AsInstanceOf(); + return true; + } + } + } else if (input->IsNotEqual()) { + HInstruction* rhs = input->AsNotEqual()->GetConstantRight(); + if (rhs != nullptr) { + HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft(); + if (lhs->IsInstanceOf() && rhs->IsIntConstant()) { + if (rhs->AsIntConstant()->IsZero()) { + // Case (1b) + *trueBranch = ifInstruction->IfTrueSuccessor(); + } else { + // Case (2b) + DCHECK(rhs->AsIntConstant()->IsOne()); + *trueBranch = ifInstruction->IfFalseSuccessor(); + } + *instanceOf = lhs->AsInstanceOf(); + return true; + } + } + } else if (input->IsInstanceOf()) { + // Case (1c) + *instanceOf = input->AsInstanceOf(); + *trueBranch = ifInstruction->IfTrueSuccessor(); + return true; + } else if (input->IsBooleanNot()) { + HInstruction* not_input = input->InputAt(0); + if (not_input->IsInstanceOf()) { + // Case (2c) + *instanceOf = not_input->AsInstanceOf(); + *trueBranch = ifInstruction->IfFalseSuccessor(); + return true; + } + } + + return false; +} + // Detects if `block` is the True block for the pattern // `if (x instanceof ClassX) { }` // If that's the case insert an HBoundType instruction to bound the type of `x` @@ -385,22 +455,11 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { if (ifInstruction == nullptr) { return; } - HInstruction* ifInput = ifInstruction->InputAt(0); - HInstruction* instanceOf = nullptr; - HBasicBlock* instanceOfTrueBlock = nullptr; - // The instruction simplifier has transformed: - // - `if (a instanceof A)` into an HIf with an HInstanceOf input - // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn - // has an HInstanceOf input) - // So we should not see the usual HEqual here. - if (ifInput->IsInstanceOf()) { - instanceOf = ifInput; - instanceOfTrueBlock = ifInstruction->IfTrueSuccessor(); - } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) { - instanceOf = ifInput->InputAt(0); - instanceOfTrueBlock = ifInstruction->IfFalseSuccessor(); - } else { + // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns. + HInstanceOf* instanceOf = nullptr; + HBasicBlock* instanceOfTrueBlock = nullptr; + if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) { return; } @@ -505,13 +564,6 @@ void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void RTPVisitor::VisitNullConstant(HNullConstant* instr) { - // TODO: The null constant could be bound contextually (e.g. based on return statements) - // to a more precise type. - instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); -} - void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } @@ -523,7 +575,11 @@ void RTPVisitor::VisitNewArray(HNewArray* instr) { static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_) { mirror::DexCache* dex_cache = - Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false); + Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true); + if (dex_cache == nullptr) { + // Dex cache could not be found. This should only happen during gtests. + return nullptr; + } // Get type from dex cache assuming it was populated by the verifier. return dex_cache->GetResolvedType(type_idx); } @@ -540,17 +596,24 @@ void RTPVisitor::VisitParameterValue(HParameterValue* instr) { void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info) { - // The field index is unknown only during tests. - if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { + if (instr->GetType() != Primitive::kPrimNot) { return; } ScopedObjectAccess soa(Thread::Current()); - ClassLinker* cl = Runtime::Current()->GetClassLinker(); - ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get()); - // TODO: There are certain cases where we can't resolve the field. - // b/21914925 is open to keep track of a repro case for this issue. - mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>(); + mirror::Class* klass = nullptr; + + // The field index is unknown only during tests. + if (info.GetFieldIndex() != kUnknownFieldIndex) { + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get()); + // TODO: There are certain cases where we can't resolve the field. + // b/21914925 is open to keep track of a repro case for this issue. + if (field != nullptr) { + klass = field->GetType<false>(); + } + } + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } @@ -666,7 +729,7 @@ void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { } void ReferenceTypePropagation::VisitPhi(HPhi* phi) { - if (phi->GetType() != Primitive::kPrimNot) { + if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) { return; } @@ -824,6 +887,8 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { // NullConstant inputs are ignored during merging as they do not provide any useful information. // If all the inputs are NullConstants then the type of the phi will be set to Object. void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { + DCHECK(instr->IsLive()); + size_t input_count = instr->InputCount(); size_t first_input_index_not_null = 0; while (first_input_index_not_null < input_count && @@ -868,7 +933,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { - DCHECK(instr->IsPhi() + DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive()) || instr->IsBoundType() || instr->IsNullCheck() || instr->IsArrayGet()); @@ -916,7 +981,7 @@ void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); - if (user->IsPhi() + if ((user->IsPhi() && user->AsPhi()->IsLive()) || user->IsBoundType() || user->IsNullCheck() || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) { diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 080f970756..b900ed0966 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -28,13 +28,13 @@ #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -#include "gtest/gtest.h" - namespace art { // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. +class RegisterAllocatorTest : public CommonCompilerTest {}; + static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -42,7 +42,7 @@ static bool Check(const uint16_t* data) { HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); - graph->TryBuildingSsa(); + TransformToSsa(graph); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); @@ -57,7 +57,7 @@ static bool Check(const uint16_t* data) { * Unit testing of RegisterAllocator::ValidateIntervals. Register allocator * tests are based on this validation method. */ -TEST(RegisterAllocatorTest, ValidateIntervals) { +TEST_F(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = CreateGraph(&allocator); @@ -146,7 +146,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { } } -TEST(RegisterAllocatorTest, CFG1) { +TEST_F(RegisterAllocatorTest, CFG1) { /* * Test the following snippet: * return 0; @@ -166,7 +166,7 @@ TEST(RegisterAllocatorTest, CFG1) { ASSERT_TRUE(Check(data)); } -TEST(RegisterAllocatorTest, Loop1) { +TEST_F(RegisterAllocatorTest, Loop1) { /* * Test the following snippet: * int a = 0; @@ -205,7 +205,7 @@ TEST(RegisterAllocatorTest, Loop1) { ASSERT_TRUE(Check(data)); } -TEST(RegisterAllocatorTest, Loop2) { +TEST_F(RegisterAllocatorTest, Loop2) { /* * Test the following snippet: * int a = 0; @@ -259,11 +259,11 @@ static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); - graph->TryBuildingSsa(); + TransformToSsa(graph); return graph; } -TEST(RegisterAllocatorTest, Loop3) { +TEST_F(RegisterAllocatorTest, Loop3) { /* * Test the following snippet: * int a = 0 @@ -326,7 +326,7 @@ TEST(RegisterAllocatorTest, Loop3) { ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister()); } -TEST(RegisterAllocatorTest, FirstRegisterUse) { +TEST_F(RegisterAllocatorTest, FirstRegisterUse) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, @@ -366,7 +366,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); } -TEST(RegisterAllocatorTest, DeadPhi) { +TEST_F(RegisterAllocatorTest, DeadPhi) { /* Test for a dead loop phi taking as back-edge input a phi that also has * this loop phi as input. Walking backwards in SsaDeadPhiElimination * does not solve the problem because the loop phi will be visited last. @@ -407,7 +407,7 @@ TEST(RegisterAllocatorTest, DeadPhi) { * that share the same register. It should split the interval it is currently * allocating for at the minimum lifetime position between the two inactive intervals. */ -TEST(RegisterAllocatorTest, FreeUntil) { +TEST_F(RegisterAllocatorTest, FreeUntil) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -539,7 +539,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, PhiHint) { +TEST_F(RegisterAllocatorTest, PhiHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HPhi *phi; @@ -658,7 +658,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { +TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *field, *ret; @@ -726,7 +726,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, SameAsFirstInputHint) { +TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *first_sub, *second_sub; @@ -795,7 +795,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { +TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *div; @@ -819,7 +819,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { // Test a bug in the register allocator, where allocating a blocked // register would lead to spilling an inactive interval at the wrong // position. -TEST(RegisterAllocatorTest, SpillInactive) { +TEST_F(RegisterAllocatorTest, SpillInactive) { ArenaPool pool; // Create a synthesized graph to please the register_allocator and diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 9e6cfbe653..9e869e18e9 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -17,214 +17,11 @@ #include "ssa_builder.h" #include "nodes.h" -#include "primitive_type_propagation.h" +#include "reference_type_propagation.h" #include "ssa_phi_elimination.h" namespace art { -// Returns whether this is a loop header phi which was eagerly created but later -// found inconsistent due to the vreg being undefined in one of its predecessors. -// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. -static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { - return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); -} - -/** - * A debuggable application may require to reviving phis, to ensure their - * associated DEX register is available to a debugger. This class implements - * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It - * also makes sure that phis with incompatible input types are not revived - * (statement (b) of the SsaBuilder). - * - * This phase must be run after detecting dead phis through the - * DeadPhiElimination phase, and before deleting the dead phis. - */ -class DeadPhiHandling : public ValueObject { - public: - explicit DeadPhiHandling(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { - worklist_.reserve(kDefaultWorklistSize); - } - - void Run(); - - private: - void VisitBasicBlock(HBasicBlock* block); - void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HPhi* phi); - bool UpdateType(HPhi* phi); - - HGraph* const graph_; - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - - DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); -}; - -static bool HasConflictingEquivalent(HPhi* phi) { - if (phi->GetNext() == nullptr) { - return false; - } - HPhi* next = phi->GetNext()->AsPhi(); - if (next->GetRegNumber() == phi->GetRegNumber()) { - if (next->GetType() == Primitive::kPrimVoid) { - // We only get a void type for an equivalent phi we processed and found out - // it was conflicting. - return true; - } else { - // Go to the next phi, in case it is also an equivalent. - return HasConflictingEquivalent(next); - } - } - return false; -} - -bool DeadPhiHandling::UpdateType(HPhi* phi) { - if (phi->IsDead()) { - // Phi was rendered dead while waiting in the worklist because it was replaced - // with an equivalent. - return false; - } - - Primitive::Type existing = phi->GetType(); - - bool conflict = false; - Primitive::Type new_type = existing; - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - // We are doing a reverse post order visit of the graph, reviving - // phis that have environment uses and updating their types. If an - // input is a phi, and it is dead (because its input types are - // conflicting), this phi must be marked dead as well. - conflict = true; - break; - } - Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); - - // The only acceptable transitions are: - // - From void to typed: first time we update the type of this phi. - // - From int to reference (or reference to int): the phi has to change - // to reference type. If the integer input cannot be converted to a - // reference input, the phi will remain dead. - if (new_type == Primitive::kPrimVoid) { - new_type = input_type; - } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { - if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { - // If we already asked for an equivalent of the input phi, but that equivalent - // ended up conflicting, make this phi conflicting too. - conflict = true; - break; - } - HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); - if (equivalent == nullptr) { - conflict = true; - break; - } - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } - } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { - new_type = Primitive::kPrimNot; - // Start over, we may request reference equivalents for the inputs of the phi. - i = -1; - } else if (new_type != input_type) { - conflict = true; - break; - } - } - - if (conflict) { - phi->SetType(Primitive::kPrimVoid); - phi->SetDead(); - return true; - } else if (existing == new_type) { - return false; - } - - DCHECK(phi->IsLive()); - phi->SetType(new_type); - - // There might exist a `new_type` equivalent of `phi` already. In that case, - // we replace the equivalent with the, now live, `phi`. - HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType(); - if (equivalent != nullptr) { - // There cannot be more than two equivalents with the same type. - DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr); - // If doing fix-point iteration, the equivalent might be in `worklist_`. - // Setting it dead will make UpdateType skip it. - equivalent->SetDead(); - equivalent->ReplaceWith(phi); - } - - return true; -} - -void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - if (IsUndefinedLoopHeaderPhi(phi)) { - DCHECK(phi->IsDead()); - continue; - } - if (phi->IsDead() && phi->HasEnvironmentUses()) { - phi->SetLive(); - if (block->IsLoopHeader()) { - // Loop phis must have a type to guarantee convergence of the algorithm. - DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); - AddToWorklist(phi); - } else { - // Because we are doing a reverse post order visit, all inputs of - // this phi have been visited and therefore had their (initial) type set. - UpdateType(phi); - } - } - } -} - -void DeadPhiHandling::ProcessWorklist() { - while (!worklist_.empty()) { - HPhi* instruction = worklist_.back(); - worklist_.pop_back(); - // Note that the same equivalent phi can be added multiple times in the work list, if - // used by multiple phis. The first call to `UpdateType` will know whether the phi is - // dead or live. - if (instruction->IsLive() && UpdateType(instruction)) { - AddDependentInstructionsToWorklist(instruction); - } - } -} - -void DeadPhiHandling::AddToWorklist(HPhi* instruction) { - DCHECK(instruction->IsLive()); - worklist_.push_back(instruction); -} - -void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) { - for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr && !phi->IsDead()) { - AddToWorklist(phi); - } - } -} - -void DeadPhiHandling::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - ProcessWorklist(); -} - void SsaBuilder::SetLoopHeaderPhiInputs() { for (size_t i = loop_headers_.size(); i > 0; --i) { HBasicBlock* block = loop_headers_[i - 1]; @@ -285,10 +82,11 @@ void SsaBuilder::EquivalentPhisCleanup() { HPhi* phi = it.Current()->AsPhi(); HPhi* next = phi->GetNextEquivalentPhiWithSameType(); if (next != nullptr) { - // Make sure we do not replace a live phi with a dead phi. A live phi has been - // handled by the type propagation phase, unlike a dead phi. + // Make sure we do not replace a live phi with a dead phi. A live phi + // has been handled by the type propagation phase, unlike a dead phi. if (next->IsLive()) { phi->ReplaceWith(next); + phi->SetDead(); } else { next->ReplaceWith(phi); } @@ -300,64 +98,7 @@ void SsaBuilder::EquivalentPhisCleanup() { } } -void SsaBuilder::BuildSsa() { - // 1) Visit in reverse post order. We need to have all predecessors of a block visited - // (with the exception of loops) in order to create the right environment for that - // block. For loops, we create phis whose inputs will be set in 2). - for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - - // 2) Set inputs of loop phis. - SetLoopHeaderPhiInputs(); - - // 3) Mark dead phis. This will mark phis that are only used by environments: - // at the DEX level, the type of these phis does not need to be consistent, but - // our code generator will complain if the inputs of a phi do not have the same - // type. The marking allows the type propagation to know which phis it needs - // to handle. We mark but do not eliminate: the elimination will be done in - // step 9). - SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph()); - dead_phis_for_type_propagation.MarkDeadPhis(); - - // 4) Propagate types of phis. At this point, phis are typed void in the general - // case, or float/double/reference when we created an equivalent phi. So we - // need to propagate the types across phis to give them a correct type. - PrimitiveTypePropagation type_propagation(GetGraph()); - type_propagation.Run(); - - // 5) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This was fixed during the type propagation in 4) but - // as a result we may end up with two equivalent phis with the same type for - // the same dex register. This pass cleans them up. - EquivalentPhisCleanup(); - - // 6) Mark dead phis again. Step 4) may have introduced new phis. - // Step 5) might enable the death of new phis. - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.MarkDeadPhis(); - - // 7) Now that the graph is correctly typed, we can get rid of redundant phis. - // Note that we cannot do this phase before type propagation, otherwise - // we could get rid of phi equivalents, whose presence is a requirement for the - // type propagation phase. Note that this is to satisfy statement (a) of the - // SsaBuilder (see ssa_builder.h). - SsaRedundantPhiElimination redundant_phi(GetGraph()); - redundant_phi.Run(); - - // 8) Fix the type for null constants which are part of an equality comparison. - // We need to do this after redundant phi elimination, to ensure the only cases - // that we can see are reference comparison against 0. The redundant phi - // elimination ensures we do not see a phi taking two 0 constants in a HEqual - // or HNotEqual. - FixNullConstantType(); - - // 9) Make sure environments use the right phi "equivalent": a phi marked dead - // can have a phi equivalent that is not dead. We must therefore update - // all environment uses of the dead phi to use its equivalent. Note that there - // can be multiple phis for the same Dex register that are live (for example - // when merging constants), in which case it is OK for the environments - // to just reference one. +void SsaBuilder::FixEnvironmentPhis() { for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { @@ -378,24 +119,345 @@ void SsaBuilder::BuildSsa() { phi->ReplaceWith(next); } } +} - // 10) Deal with phis to guarantee liveness of phis in case of a debuggable - // application. This is for satisfying statement (c) of the SsaBuilder - // (see ssa_builder.h). - if (GetGraph()->IsDebuggable()) { - DeadPhiHandling dead_phi_handler(GetGraph()); - dead_phi_handler.Run(); +static void AddDependentInstructionsToWorklist(HInstruction* instruction, + ArenaVector<HPhi*>* worklist) { + // If `instruction` is a dead phi, type conflict was just identified. All its + // live phi users, and transitively users of those users, therefore need to be + // marked dead/conflicting too, so we add them to the worklist. Otherwise we + // add users whose type does not match and needs to be updated. + bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead(); + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (user->IsPhi() && user->AsPhi()->IsLive()) { + if (add_all_live_phis || user->GetType() != instruction->GetType()) { + worklist->push_back(user->AsPhi()); + } + } } +} + +// Find a candidate primitive type for `phi` by merging the type of its inputs. +// Return false if conflict is identified. +static bool TypePhiFromInputs(HPhi* phi) { + Primitive::Type common_type = phi->GetType(); - // 11) Now that the right phis are used for the environments, and we - // have potentially revive dead phis in case of a debuggable application, - // we can eliminate phis we do not need. Regardless of the debuggable status, - // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h), - // as well as for the code generation, which does not deal with phis of conflicting + for (HInputIterator it(phi); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi() && input->AsPhi()->IsDead()) { + // Phis are constructed live so if an input is a dead phi, it must have + // been made dead due to type conflict. Mark this phi conflicting too. + return false; + } + + Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); + if (common_type == input_type) { + // No change in type. + } else if (Primitive::ComponentSize(common_type) != Primitive::ComponentSize(input_type)) { + // Types are of different sizes, e.g. int vs. long. Must be a conflict. + return false; + } else if (Primitive::IsIntegralType(common_type)) { + // Previous inputs were integral, this one is not but is of the same size. + // This does not imply conflict since some bytecode instruction types are + // ambiguous. TypeInputsOfPhi will either type them or detect a conflict. + DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot); + common_type = input_type; + } else if (Primitive::IsIntegralType(input_type)) { + // Input is integral, common type is not. Same as in the previous case, if + // there is a conflict, it will be detected during TypeInputsOfPhi. + DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot); + } else { + // Combining float and reference types. Clearly a conflict. + DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) || + (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat)); + return false; + } + } + + // We have found a candidate type for the phi. Set it and return true. We may + // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi. + phi->SetType(common_type); + return true; +} + +// Replace inputs of `phi` to match its type. Return false if conflict is identified. +bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { + Primitive::Type common_type = phi->GetType(); + if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) { + // Phi either contains only other untyped phis (common_type == kPrimVoid), + // or `common_type` is integral and we do not need to retype ambiguous inputs + // because they are always constructed with the integral type candidate. + if (kIsDebugBuild) { + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (common_type == Primitive::kPrimVoid) { + DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid); + } else { + DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) || + HPhi::ToPhiType(input->GetType()) == common_type); + } + } + } + // Inputs did not need to be replaced, hence no conflict. Report success. + return true; + } else { + DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type)); + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (input->GetType() != common_type) { + // Input type does not match phi's type. Try to retype the input or + // generate a suitably typed equivalent. + HInstruction* equivalent = (common_type == Primitive::kPrimNot) + ? GetReferenceTypeEquivalent(input) + : GetFloatOrDoubleEquivalent(input, common_type); + if (equivalent == nullptr) { + // Input could not be typed. Report conflict. + return false; + } + // Make sure the input did not change its type and we do not need to + // update its users. + DCHECK_NE(input, equivalent); + + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + worklist->push_back(equivalent->AsPhi()); + } + } + } + // All inputs either matched the type of the phi or we successfully replaced + // them with a suitable equivalent. Report success. + return true; + } +} + +// Attempt to set the primitive type of `phi` to match its inputs. Return whether +// it was changed by the algorithm or not. +bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { + DCHECK(phi->IsLive()); + Primitive::Type original_type = phi->GetType(); + + // Try to type the phi in two stages: + // (1) find a candidate type for the phi by merging types of all its inputs, + // (2) try to type the phi's inputs to that candidate type. + // Either of these stages may detect a type conflict and fail, in which case + // we immediately abort. + if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) { + // Conflict detected. Mark the phi dead and return true because it changed. + phi->SetDead(); + return true; + } + + // Return true if the type of the phi has changed. + return phi->GetType() != original_type; +} + +void SsaBuilder::RunPrimitiveTypePropagation() { + ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter()); + + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (block->IsLoopHeader()) { + for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + if (phi->IsLive()) { + worklist.push_back(phi); + } + } + } else { + for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + // Eagerly compute the type of the phi, for quicker convergence. Note + // that we don't need to add users to the worklist because we are + // doing a reverse post-order visit, therefore either the phi users are + // non-loop phi and will be visited later in the visit, or are loop-phis, + // and they are already in the work list. + HPhi* phi = phi_it.Current()->AsPhi(); + if (phi->IsLive()) { + UpdatePrimitiveType(phi, &worklist); + } + } + } + } + + ProcessPrimitiveTypePropagationWorklist(&worklist); + EquivalentPhisCleanup(); +} + +void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) { + // Process worklist + while (!worklist->empty()) { + HPhi* phi = worklist->back(); + worklist->pop_back(); + // The phi could have been made dead as a result of conflicts while in the + // worklist. If it is now dead, there is no point in updating its type. + if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) { + AddDependentInstructionsToWorklist(phi, worklist); + } + } +} + +static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(Primitive::IsIntOrLongType(type)); + HArrayGet* next = aget->GetNext()->AsArrayGet(); + return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr; +} + +static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(Primitive::IsIntOrLongType(type)); + DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr); + + HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet( + aget->GetArray(), + aget->GetIndex(), + type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble, + aget->GetDexPc()); + aget->GetBlock()->InsertInstructionAfter(equivalent, aget); + return equivalent; +} + +// Returns true if the array input of `aget` is either of type int[] or long[]. +// Should only be called on ArrayGets with ambiguous type (int/float, long/double) +// on arrays which were typed to an array class by RTP. +static bool IsArrayGetOnIntegralArray(HArrayGet* aget) SHARED_REQUIRES(Locks::mutator_lock_) { + ReferenceTypeInfo array_type = aget->GetArray()->GetReferenceTypeInfo(); + DCHECK(array_type.IsPrimitiveArrayClass()); + ReferenceTypeInfo::TypeHandle array_type_handle = array_type.GetTypeHandle(); + + bool is_integral_type; + if (Primitive::Is64BitType(aget->GetType())) { + is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveLong(); + DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveDouble()); + } else { + is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveInt(); + DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveFloat()); + } + return is_integral_type; +} + +bool SsaBuilder::FixAmbiguousArrayGets() { + if (ambiguous_agets_.empty()) { + return true; + } + + // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet + // uses (because they are untyped) and environment uses (if --debuggable). + // After resolving all ambiguous ArrayGets, we will re-run primitive type + // propagation on the Phis which need to be updated. + ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter()); + + { + ScopedObjectAccess soa(Thread::Current()); + + for (HArrayGet* aget_int : ambiguous_agets_) { + if (!aget_int->GetArray()->GetReferenceTypeInfo().IsPrimitiveArrayClass()) { + // RTP did not type the input array. Bail. + return false; + } + + HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int); + if (IsArrayGetOnIntegralArray(aget_int)) { + if (aget_float != nullptr) { + // There is a float/double equivalent. We must replace it and re-run + // primitive type propagation on all dependent instructions. + aget_float->ReplaceWith(aget_int); + aget_float->GetBlock()->RemoveInstruction(aget_float); + AddDependentInstructionsToWorklist(aget_int, &worklist); + } + } else { + if (aget_float == nullptr) { + // This is a float/double ArrayGet but there were no typed uses which + // would create the typed equivalent. Create it now. + aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int); + } + // Replace the original int/long instruction. Note that it may have phi + // uses, environment uses, as well as real uses (from untyped ArraySets). + // We need to re-run primitive type propagation on its dependent instructions. + aget_int->ReplaceWith(aget_float); + aget_int->GetBlock()->RemoveInstruction(aget_int); + AddDependentInstructionsToWorklist(aget_float, &worklist); + } + } + } + + // Set a flag stating that types of ArrayGets have been resolved. This is used + // by GetFloatOrDoubleEquivalentOfArrayGet to report conflict. + agets_fixed_ = true; + + if (!worklist.empty()) { + ProcessPrimitiveTypePropagationWorklist(&worklist); + EquivalentPhisCleanup(); + } + + return true; +} + +BuildSsaResult SsaBuilder::BuildSsa() { + // 1) Visit in reverse post order. We need to have all predecessors of a block + // visited (with the exception of loops) in order to create the right environment + // for that block. For loops, we create phis whose inputs will be set in 2). + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + + // 2) Set inputs of loop header phis. + SetLoopHeaderPhiInputs(); + + // 3) Propagate types of phis. At this point, phis are typed void in the general + // case, or float/double/reference if we created an equivalent phi. So we need + // to propagate the types across phis to give them a correct type. If a type + // conflict is detected in this stage, the phi is marked dead. + RunPrimitiveTypePropagation(); + + // 4) Now that the correct primitive types have been assigned, we can get rid + // of redundant phis. Note that we cannot do this phase before type propagation, + // otherwise we could get rid of phi equivalents, whose presence is a requirement + // for the type propagation phase. Note that this is to satisfy statement (a) + // of the SsaBuilder (see ssa_builder.h). + SsaRedundantPhiElimination(GetGraph()).Run(); + + // 5) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + + // 6) Compute type of reference type instructions. The pass assumes that + // NullConstant has been fixed up. + ReferenceTypePropagation(GetGraph(), handles_).Run(); + + // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float + // or long/double). Now that RTP computed the type of the array input, the + // ambiguity can be resolved and the correct equivalent kept. + if (!FixAmbiguousArrayGets()) { + return kBuildSsaFailAmbiguousArrayGet; + } + + // 8) Mark dead phis. This will mark phis which are not used by instructions + // or other live phis. If compiling as debuggable code, phis will also be kept + // live if they have an environment use. + SsaDeadPhiElimination dead_phi_elimimation(GetGraph()); + dead_phi_elimimation.MarkDeadPhis(); + + // 9) Make sure environments use the right phi equivalent: a phi marked dead + // can have a phi equivalent that is not dead. In that case we have to replace + // it with the live equivalent because deoptimization and try/catch rely on + // environments containing values of all live vregs at that point. Note that + // there can be multiple phis for the same Dex register that are live + // (for example when merging constants), in which case it is okay for the + // environments to just reference one. + FixEnvironmentPhis(); + + // 10) Now that the right phis are used for the environments, we can eliminate + // phis we do not need. Regardless of the debuggable status, this phase is + /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well + // as for the code generation, which does not deal with phis of conflicting // input types. - dead_phis.EliminateDeadPhis(); + dead_phi_elimimation.EliminateDeadPhis(); - // 12) Clear locals. + // 11) Clear locals. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); it.Advance()) { @@ -404,6 +466,8 @@ void SsaBuilder::BuildSsa() { current->GetBlock()->RemoveInstruction(current); } } + + return kBuildSsaSuccess; } ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { @@ -591,6 +655,8 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { * phi with a floating point / reference type. */ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one."; + // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); if (next != nullptr @@ -606,35 +672,50 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - // Copy the inputs. Note that the graph may not be correctly typed by doing this copy, - // but the type propagation phase will fix it. + // Copy the inputs. Note that the graph may not be correctly typed + // by doing this copy, but the type propagation phase will fix it. new_phi->SetRawInputAt(i, phi->InputAt(i)); } phi->GetBlock()->InsertPhiAfter(new_phi, phi); + DCHECK(new_phi->IsLive()); return new_phi; } else { + // An existing equivalent was found. If it is dead, conflict was previously + // identified and we return nullptr instead. HPhi* next_phi = next->AsPhi(); DCHECK_EQ(next_phi->GetType(), type); - if (next_phi->IsDead()) { - // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) - // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This - // cannot revive undefined loop header phis because they cannot have uses. - DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); - next_phi->SetLive(); + return next_phi->IsLive() ? next_phi : nullptr; + } +} + +HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + DCHECK(Primitive::IsIntegralType(aget->GetType())); + + if (!Primitive::IsIntOrLongType(aget->GetType())) { + // Cannot type boolean, char, byte, short to float/double. + return nullptr; + } + + DCHECK(ContainsElement(ambiguous_agets_, aget)); + if (agets_fixed_) { + // This used to be an ambiguous ArrayGet but its type has been resolved to + // int/long. Requesting a float/double equivalent should lead to a conflict. + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(IsArrayGetOnIntegralArray(aget)); } - return next_phi; + return nullptr; + } else { + // This is an ambiguous ArrayGet which has not been resolved yet. Return an + // equivalent float/double instruction to use until it is resolved. + HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget); + return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent; } } -HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, - HInstruction* value, - Primitive::Type type) { +HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) { if (value->IsArrayGet()) { - // The verifier has checked that values in arrays cannot be used for both - // floating point and non-floating point operations. It is therefore safe to just - // change the type of the operation. - value->AsArrayGet()->SetType(type); - return value; + return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet()); } else if (value->IsLongConstant()) { return GetDoubleEquivalent(value->AsLongConstant()); } else if (value->IsIntConstant()) { @@ -642,12 +723,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } else if (value->IsPhi()) { return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type); } else { - // For other instructions, we assume the verifier has checked that the dex format is correctly - // typed and the value in a dex register will not be used for both floating point and - // non-floating point operations. So the only reason an instruction would want a floating - // point equivalent is for an unused phi that will be removed by the dead phi elimination phase. - DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")"; - return value; + return nullptr; } } @@ -662,15 +738,17 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { + Primitive::Type load_type = load->GetType(); HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()]; // If the operation requests a specific type, we make sure its input is of that type. - if (load->GetType() != value->GetType()) { - if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { - value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); - } else if (load->GetType() == Primitive::kPrimNot) { + if (load_type != value->GetType()) { + if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) { + value = GetFloatOrDoubleEquivalent(value, load_type); + } else if (load_type == Primitive::kPrimNot) { value = GetReferenceTypeEquivalent(value); } } + load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); } @@ -760,4 +838,13 @@ void SsaBuilder::VisitTemporary(HTemporary* temp) { temp->GetBlock()->RemoveInstruction(temp); } +void SsaBuilder::VisitArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(!Primitive::IsFloatingPointType(type)); + if (Primitive::IsIntOrLongType(type)) { + ambiguous_agets_.push_back(aget); + } + VisitInstruction(aget); +} + } // namespace art diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index dcce5e4c2c..ed6f5cab51 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -49,17 +49,20 @@ static constexpr int kDefaultNumberOfLoops = 2; */ class SsaBuilder : public HGraphVisitor { public: - explicit SsaBuilder(HGraph* graph) + explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles) : HGraphVisitor(graph), + handles_(handles), + agets_fixed_(false), current_locals_(nullptr), loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), + ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), locals_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { loop_headers_.reserve(kDefaultNumberOfLoops); } - void BuildSsa(); + BuildSsaResult BuildSsa(); // Returns locals vector for `block`. If it is a catch block, the vector will be // prepopulated with catch phis for vregs which are defined in `current_locals_`. @@ -71,23 +74,38 @@ class SsaBuilder : public HGraphVisitor { void VisitStoreLocal(HStoreLocal* store); void VisitInstruction(HInstruction* instruction); void VisitTemporary(HTemporary* instruction); - - static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user, - HInstruction* instruction, - Primitive::Type type); - - static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + void VisitArrayGet(HArrayGet* aget); static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: void SetLoopHeaderPhiInputs(); + void FixEnvironmentPhis(); void FixNullConstantType(); void EquivalentPhisCleanup(); + void RunPrimitiveTypePropagation(); + + // Attempts to resolve types of aget and aget-wide instructions from reference + // type information on the input array. Returns false if the type of the array + // is unknown. + bool FixAmbiguousArrayGets(); + + bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist); + bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist); + void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist); - static HFloatConstant* GetFloatEquivalent(HIntConstant* constant); - static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); - static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type); + HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + + HFloatConstant* GetFloatEquivalent(HIntConstant* constant); + HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); + HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); + + StackHandleScopeCollection* const handles_; + + // True if types of ambiguous ArrayGets have been resolved. + bool agets_fixed_; // Locals for the current block being visited. ArenaVector<HInstruction*>* current_locals_; @@ -96,6 +114,8 @@ class SsaBuilder : public HGraphVisitor { // over these blocks to set the inputs of their phis. ArenaVector<HBasicBlock*> loop_headers_; + ArenaVector<HArrayGet*> ambiguous_agets_; + // HEnvironment for each block. ArenaVector<ArenaVector<HInstruction*>> locals_for_; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index a3219dcc38..63aba88c2b 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -40,15 +40,17 @@ void SsaDeadPhiElimination::MarkDeadPhis() { continue; } - bool has_non_phi_use = false; - for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - if (!use_it.Current()->GetUser()->IsPhi()) { - has_non_phi_use = true; - break; + bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses()); + if (!keep_alive) { + for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { + if (!use_it.Current()->GetUser()->IsPhi()) { + keep_alive = true; + break; + } } } - if (has_non_phi_use) { + if (keep_alive) { worklist_.push_back(phi); } else { phi->SetDead(); @@ -94,8 +96,8 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* user = use_it.Current()->GetUser(); - DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); - DCHECK(user->AsPhi()->IsDead()) << user->GetId(); + DCHECK(user->IsLoopHeaderPhi()); + DCHECK(user->AsPhi()->IsDead()); } } // Remove the phi from use lists of its inputs. diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 024278f4b2..d2885a8fd7 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -28,6 +28,8 @@ namespace art { +class SsaTest : public CommonCompilerTest {}; + class SsaPrettyPrinter : public HPrettyPrinter { public: explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {} @@ -83,11 +85,10 @@ static void TestCode(const uint16_t* data, const char* expected) { bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->BuildDominatorTree(); + TransformToSsa(graph); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); - graph->TransformToSsa(); ReNumberInstructions(graph); // Test that phis had their type set. @@ -103,7 +104,7 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_STREQ(expected, printer.str().c_str()); } -TEST(SsaTest, CFG1) { +TEST_F(SsaTest, CFG1) { // Test that we get rid of loads and stores. const char* expected = "BasicBlock 0, succ: 1\n" @@ -131,7 +132,7 @@ TEST(SsaTest, CFG1) { TestCode(data, expected); } -TEST(SsaTest, CFG2) { +TEST_F(SsaTest, CFG2) { // Test that we create a phi for the join block of an if control flow instruction // when there is only code in the else branch. const char* expected = @@ -162,7 +163,7 @@ TEST(SsaTest, CFG2) { TestCode(data, expected); } -TEST(SsaTest, CFG3) { +TEST_F(SsaTest, CFG3) { // Test that we create a phi for the join block of an if control flow instruction // when both branches update a local. const char* expected = @@ -195,7 +196,7 @@ TEST(SsaTest, CFG3) { TestCode(data, expected); } -TEST(SsaTest, Loop1) { +TEST_F(SsaTest, Loop1) { // Test that we create a phi for an initialized local at entry of a loop. const char* expected = "BasicBlock 0, succ: 1\n" @@ -228,7 +229,7 @@ TEST(SsaTest, Loop1) { TestCode(data, expected); } -TEST(SsaTest, Loop2) { +TEST_F(SsaTest, Loop2) { // Simple loop with one preheader and one back edge. const char* expected = "BasicBlock 0, succ: 1\n" @@ -258,7 +259,7 @@ TEST(SsaTest, Loop2) { TestCode(data, expected); } -TEST(SsaTest, Loop3) { +TEST_F(SsaTest, Loop3) { // Test that a local not yet defined at the entry of a loop is handled properly. const char* expected = "BasicBlock 0, succ: 1\n" @@ -290,7 +291,7 @@ TEST(SsaTest, Loop3) { TestCode(data, expected); } -TEST(SsaTest, Loop4) { +TEST_F(SsaTest, Loop4) { // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. const char* expected = @@ -325,7 +326,7 @@ TEST(SsaTest, Loop4) { TestCode(data, expected); } -TEST(SsaTest, Loop5) { +TEST_F(SsaTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. const char* expected = @@ -367,7 +368,7 @@ TEST(SsaTest, Loop5) { TestCode(data, expected); } -TEST(SsaTest, Loop6) { +TEST_F(SsaTest, Loop6) { // Test a loop with one preheader and two back edges (e.g. continue). const char* expected = "BasicBlock 0, succ: 1\n" @@ -406,7 +407,7 @@ TEST(SsaTest, Loop6) { TestCode(data, expected); } -TEST(SsaTest, Loop7) { +TEST_F(SsaTest, Loop7) { // Test a loop with one preheader, one back edge, and two exit edges (e.g. break). const char* expected = "BasicBlock 0, succ: 1\n" @@ -448,7 +449,7 @@ TEST(SsaTest, Loop7) { TestCode(data, expected); } -TEST(SsaTest, DeadLocal) { +TEST_F(SsaTest, DeadLocal) { // Test that we correctly handle a local not being used. const char* expected = "BasicBlock 0, succ: 1\n" @@ -466,7 +467,7 @@ TEST(SsaTest, DeadLocal) { TestCode(data, expected); } -TEST(SsaTest, LocalInIf) { +TEST_F(SsaTest, LocalInIf) { // Test that we do not create a phi in the join block when one predecessor // does not update the local. const char* expected = @@ -496,7 +497,7 @@ TEST(SsaTest, LocalInIf) { TestCode(data, expected); } -TEST(SsaTest, MultiplePredecessors) { +TEST_F(SsaTest, MultiplePredecessors) { // Test that we do not create a phi when one predecessor // does not update the local. const char* expected = |