diff options
Diffstat (limited to 'compiler/optimizing')
30 files changed, 1446 insertions, 287 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 503d08f6f5..21540e8ed7 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1175,10 +1175,9 @@ void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register verified_method->GetStringInitPcRegMap(); auto map_it = string_init_map.find(dex_pc); if (map_it != string_init_map.end()) { - std::set<uint32_t> reg_set = map_it->second; - for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) { + for (uint32_t reg : map_it->second) { HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc); - UpdateLocal(*set_it, load_local, dex_pc); + UpdateLocal(reg, load_local, dex_pc); } } } else { @@ -1302,7 +1301,13 @@ bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const { soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); - return outer_class.Get() == cls.Get(); + // GetOutermostCompilingClass returns null when the class is unresolved + // (e.g. if it derives from an unresolved class). This is bogus knowing that + // we are compiling it. + // When this happens we cannot establish a direct relation between the current + // class and the outer class, so we return false. + // (Note that this is only used for optimizing invokes and field accesses) + return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get()); } void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 28970062cc..6a743ebbc9 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,6 +41,7 @@ #include "driver/dex_compilation_unit.h" #include "gc_map_builder.h" #include "graph_visualizer.h" +#include "intrinsics.h" #include "leb128.h" #include "mapping_table.h" #include "mirror/array-inl.h" @@ -1381,4 +1382,57 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* } } +void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { + // Check to see if we have known failures that will cause us to have to bail out + // to the runtime, and just generate the runtime call directly. + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + + // The positions must be non-negative. + if ((src_pos != nullptr && src_pos->GetValue() < 0) || + (dest_pos != nullptr && dest_pos->GetValue() < 0)) { + // We will have to fail anyways. + return; + } + + // The length must be >= 0. + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + if (length != nullptr) { + int32_t len = length->GetValue(); + if (len < 0) { + // Just call as normal. + return; + } + } + + SystemArrayCopyOptimizations optimizations(invoke); + + if (optimizations.GetDestinationIsSource()) { + if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { + // We only support backward copying if source and destination are the same. + return; + } + } + + if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { + // We currently don't intrinsify primitive copying. + return; + } + + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); + locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index acce5b3359..b04dfc00b2 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -421,6 +421,8 @@ class CodeGenerator { Location runtime_type_index_location, Location runtime_return_location); + static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); + void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cac74c8da6..1df6818549 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1703,6 +1703,7 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetAssembler(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { return; @@ -1742,6 +1743,7 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetAssembler(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { return; @@ -3570,6 +3572,47 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } } +Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant, + Opcode opcode) { + DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); + if (constant->IsConstant() && + CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { + return Location::ConstantLocation(constant->AsConstant()); + } + return Location::RequiresRegister(); +} + +bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst, + Opcode opcode) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); + if (Primitive::Is64BitType(input_cst->GetType())) { + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) && + CanEncodeConstantAsImmediate(High32Bits(value), opcode); + } else { + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); + } +} + +bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) { + ShifterOperand so; + ArmAssembler* assembler = codegen_->GetAssembler(); + if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) { + return true; + } + Opcode neg_opcode = kNoOperand; + switch (opcode) { + case AND: + neg_opcode = BIC; + break; + case ORR: + neg_opcode = ORN; + break; + default: + return false; + } + return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so); +} + void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -4970,17 +5013,18 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr nullptr); } -void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } -void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } -void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } +void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } +void LocationsBuilderARM::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction, ORR); } +void LocationsBuilderARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction, EOR); } -void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) { +void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); DCHECK(instruction->GetResultType() == Primitive::kPrimInt || instruction->GetResultType() == Primitive::kPrimLong); + // Note: GVN reorders commutative operations to have the constant on the right hand side. locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -4996,48 +5040,131 @@ void InstructionCodeGeneratorARM::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } +void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) { + // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier). + if (value == 0xffffffffu) { + if (out != first) { + __ mov(out, ShifterOperand(first)); + } + return; + } + if (value == 0u) { + __ mov(out, ShifterOperand(0)); + return; + } + ShifterOperand so; + if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) { + __ and_(out, first, so); + } else { + DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)); + __ bic(out, first, ShifterOperand(~value)); + } +} + +void InstructionCodeGeneratorARM::GenerateOrrConst(Register out, Register first, uint32_t value) { + // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier). + if (value == 0u) { + if (out != first) { + __ mov(out, ShifterOperand(first)); + } + return; + } + if (value == 0xffffffffu) { + __ mvn(out, ShifterOperand(0)); + return; + } + ShifterOperand so; + if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORR, value, &so)) { + __ orr(out, first, so); + } else { + DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, ORN, ~value, &so)); + __ orn(out, first, ShifterOperand(~value)); + } +} + +void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, uint32_t value) { + // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier). + if (value == 0u) { + if (out != first) { + __ mov(out, ShifterOperand(first)); + } + return; + } + __ eor(out, first, ShifterOperand(value)); +} + void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + uint32_t value_low = Low32Bits(value); + if (instruction->GetResultType() == Primitive::kPrimInt) { + Register first_reg = first.AsRegister<Register>(); + Register out_reg = out.AsRegister<Register>(); + if (instruction->IsAnd()) { + GenerateAndConst(out_reg, first_reg, value_low); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_reg, first_reg, value_low); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_reg, first_reg, value_low); + } + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + uint32_t value_high = High32Bits(value); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); + Register out_low = out.AsRegisterPairLow<Register>(); + Register out_high = out.AsRegisterPairHigh<Register>(); + if (instruction->IsAnd()) { + GenerateAndConst(out_low, first_low, value_low); + GenerateAndConst(out_high, first_high, value_high); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_low, first_low, value_low); + GenerateOrrConst(out_high, first_high, value_high); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_low, first_low, value_low); + GenerateEorConst(out_high, first_high, value_high); + } + } + return; + } if (instruction->GetResultType() == Primitive::kPrimInt) { - Register first = locations->InAt(0).AsRegister<Register>(); - Register second = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Register first_reg = first.AsRegister<Register>(); + ShifterOperand second_reg(second.AsRegister<Register>()); + Register out_reg = out.AsRegister<Register>(); if (instruction->IsAnd()) { - __ and_(out, first, ShifterOperand(second)); + __ and_(out_reg, first_reg, second_reg); } else if (instruction->IsOr()) { - __ orr(out, first, ShifterOperand(second)); + __ orr(out_reg, first_reg, second_reg); } else { DCHECK(instruction->IsXor()); - __ eor(out, first, ShifterOperand(second)); + __ eor(out_reg, first_reg, second_reg); } } else { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); - Location first = locations->InAt(0); - Location second = locations->InAt(1); - Location out = locations->Out(); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); + ShifterOperand second_low(second.AsRegisterPairLow<Register>()); + ShifterOperand second_high(second.AsRegisterPairHigh<Register>()); + Register out_low = out.AsRegisterPairLow<Register>(); + Register out_high = out.AsRegisterPairHigh<Register>(); if (instruction->IsAnd()) { - __ and_(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ and_(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + __ and_(out_low, first_low, second_low); + __ and_(out_high, first_high, second_high); } else if (instruction->IsOr()) { - __ orr(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ orr(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + __ orr(out_low, first_low, second_low); + __ orr(out_high, first_high, second_high); } else { DCHECK(instruction->IsXor()); - __ eor(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ eor(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + __ eor(out_low, first_low, second_low); + __ eor(out_high, first_high, second_high); } } } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 16d1d383b4..6900933e87 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -169,11 +169,15 @@ class LocationsBuilderARM : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); - void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); + bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode); + CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -205,6 +209,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateAndConst(Register out, Register first, uint32_t value); + void GenerateOrrConst(Register out, Register first, uint32_t value); + void GenerateEorConst(Register out, Register first, uint32_t value); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 5e8f9e7f30..7799437235 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -117,7 +117,7 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { return Location::RegisterLocation(A0); } Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { - return Location::RegisterLocation(A0); + return Location::RegisterLocation(V0); } Location GetSetValueLocation( Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f385afaa7f..9de159251f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -532,7 +532,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, move_resolver_(graph->GetArena(), this), isa_features_(isa_features), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -5724,6 +5725,51 @@ void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) } } +void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + + // Constant area pointer. + locations->SetInAt(1, Location::RequiresRegister()); + + // And the temporary we need. + locations->AddTemp(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Optimizing has a jump area. + Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); + Register constant_area = locations->InAt(1).AsRegister<Register>(); + + // Remove the bias, if needed. + if (lower_bound != 0) { + __ leal(temp_reg, Address(value_reg, -lower_bound)); + value_reg = temp_reg; + } + + // Is the value in range? + DCHECK_GE(num_entries, 1); + __ cmpl(value_reg, Immediate(num_entries - 1)); + __ j(kAbove, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load (target-constant_area) from the jump table, indexing by the value. + __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg)); + + // Compute the actual target address by adding in constant_area. + __ addl(temp_reg, constant_area); + + // And jump. + __ jmp(temp_reg); +} + void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress( HX86ComputeBaseMethodAddress* insn) { LocationSummary* locations = @@ -5807,28 +5853,18 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons } } -void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { - // Generate the constant area if needed. - X86Assembler* assembler = GetAssembler(); - if (!assembler->IsConstantAreaEmpty()) { - // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 - // byte values. - assembler->Align(4, 0); - constant_area_start_ = assembler->CodeSize(); - assembler->AddConstantArea(); - } - - // And finish up. - CodeGenerator::Finalize(allocator); -} - /** * Class to handle late fixup of offsets into constant area. */ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: - RIPFixup(const CodeGeneratorX86& codegen, int offset) - : codegen_(codegen), offset_into_constant_area_(offset) {} + RIPFixup(CodeGeneratorX86& codegen, size_t offset) + : codegen_(&codegen), offset_into_constant_area_(offset) {} + + protected: + void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } + + CodeGeneratorX86* codegen_; private: void Process(const MemoryRegion& region, int pos) OVERRIDE { @@ -5836,19 +5872,77 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera // last 4 bytes of the instruction. // The value to patch is the distance from the offset in the constant area // from the address computed by the HX86ComputeBaseMethodAddress instruction. - int32_t constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; - int32_t relative_position = constant_offset - codegen_.GetMethodAddressOffset();; + int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; + int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();; // Patch in the right value. region.StoreUnaligned<int32_t>(pos - 4, relative_position); } - const CodeGeneratorX86& codegen_; - // Location in constant area that the fixup refers to. - int offset_into_constant_area_; + int32_t offset_into_constant_area_; +}; + +/** + * Class to handle late fixup of offsets to a jump table that will be created in the + * constant area. + */ +class JumpTableRIPFixup : public RIPFixup { + public: + JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr) + : RIPFixup(codegen, static_cast<size_t>(-1)), switch_instr_(switch_instr) {} + + void CreateJumpTable() { + X86Assembler* assembler = codegen_->GetAssembler(); + + // Ensure that the reference to the jump table has the correct offset. + const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); + SetOffset(offset_in_constant_table); + + // The label values in the jump table are computed relative to the + // instruction addressing the constant area. + const int32_t relative_offset = codegen_->GetMethodAddressOffset(); + + // Populate the jump table with the correct values for the jump table. + int32_t num_entries = switch_instr_->GetNumEntries(); + HBasicBlock* block = switch_instr_->GetBlock(); + const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); + // The value that we want is the target offset - the position of the table. + for (int32_t i = 0; i < num_entries; i++) { + HBasicBlock* b = successors[i]; + Label* l = codegen_->GetLabelOf(b); + DCHECK(l->IsBound()); + int32_t offset_to_block = l->Position() - relative_offset; + assembler->AppendInt32(offset_to_block); + } + } + + private: + const HX86PackedSwitch* switch_instr_; }; +void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + X86Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. + assembler->Align(4, 0); + constant_area_start_ = assembler->CodeSize(); + + // Populate any jump tables. + for (auto jump_table : fixups_to_jump_tables_) { + jump_table->CreateJumpTable(); + } + + // And now add the constant area to the generated code. + assembler->AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + Address CodeGeneratorX86::LiteralDoubleAddress(double v, Register reg) { AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); return Address(reg, kDummy32BitOffset, fixup); @@ -5869,98 +5963,18 @@ Address CodeGeneratorX86::LiteralInt64Address(int64_t v, Register reg) { return Address(reg, kDummy32BitOffset, fixup); } -/** - * Finds instructions that need the constant area base as an input. - */ -class ConstantHandlerVisitor : public HGraphVisitor { - public: - explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} - - private: - void VisitAdd(HAdd* add) OVERRIDE { - BinaryFP(add); - } - - void VisitSub(HSub* sub) OVERRIDE { - BinaryFP(sub); - } - - void VisitMul(HMul* mul) OVERRIDE { - BinaryFP(mul); - } - - void VisitDiv(HDiv* div) OVERRIDE { - BinaryFP(div); - } - - void VisitReturn(HReturn* ret) OVERRIDE { - HConstant* value = ret->InputAt(0)->AsConstant(); - if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) { - ReplaceInput(ret, value, 0, true); - } - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - HandleInvoke(invoke); - } - - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { - HandleInvoke(invoke); - } - - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { - HandleInvoke(invoke); - } - - void BinaryFP(HBinaryOperation* bin) { - HConstant* rhs = bin->InputAt(1)->AsConstant(); - if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) { - ReplaceInput(bin, rhs, 1, false); - } - } - - void InitializeConstantAreaPointer(HInstruction* user) { - // Ensure we only initialize the pointer once. - if (base_ != nullptr) { - return; - } +Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, + Register reg, + Register value) { + // Create a fixup to be used to create and address the jump table. + JumpTableRIPFixup* table_fixup = + new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base_, insert_pos); - DCHECK(base_ != nullptr); - } - - void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializeConstantAreaPointer(insn); - HGraph* graph = GetGraph(); - HBasicBlock* block = insn->GetBlock(); - HX86LoadFromConstantTable* load_constant = - new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); - block->InsertInstructionBefore(load_constant, insn); - insn->ReplaceInput(load_constant, input_index); - } - - void HandleInvoke(HInvoke* invoke) { - // Ensure that we can load FP arguments from the constant area. - for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { - HConstant* input = invoke->InputAt(i)->AsConstant(); - if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) { - ReplaceInput(invoke, input, i, true); - } - } - } - - // The generated HX86ComputeBaseMethodAddress in the entry block needed as an - // input to the HX86LoadFromConstantTable instructions. - HX86ComputeBaseMethodAddress* base_; -}; + // We have to populate the jump tables. + fixups_to_jump_tables_.push_back(table_fixup); -void ConstantAreaFixups::Run() { - ConstantHandlerVisitor visitor(graph_); - visitor.VisitInsertionOrder(); + // We want a scaled address, as we are extracting the correct offset from the table. + return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup); } // TODO: target as memory. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index ae2d84f945..fdfc5ab69b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -245,6 +245,8 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86); }; +class JumpTableRIPFixup; + class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, @@ -385,6 +387,8 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralInt32Address(int32_t v, Register reg); Address LiteralInt64Address(int64_t v, Register reg); + Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); + void Finalize(CodeAllocator* allocator) OVERRIDE; private: @@ -405,6 +409,9 @@ class CodeGeneratorX86 : public CodeGenerator { // Used for fixups to the constant area. int32_t constant_area_start_; + // Fixups for jump tables that need to be patched after the constant table is generated. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // If there is a HX86ComputeBaseMethodAddress instruction in the graph // (which shall be the sole instruction of this kind), subtracting this offset // from the value contained in the out register of this HX86ComputeBaseMethodAddress diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5757787f98..bedaa5577b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -36,9 +36,6 @@ namespace art { namespace x86_64 { -// Some x86_64 instructions require a register to be available as temp. -static constexpr Register TMP = R11; - static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; @@ -679,7 +676,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, constant_area_start_(0), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -5363,31 +5361,43 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); int32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); - CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + // Remove the bias, if needed. + Register value_reg_out = value_reg_in.AsRegister(); + if (lower_bound != 0) { + __ leal(temp_reg, Address(value_reg_in, -lower_bound)); + value_reg_out = temp_reg.AsRegister(); } + CpuRegister value_reg(value_reg_out); - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); - } + // Is the value in range? + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + __ cmpl(value_reg, Immediate(num_entries - 1)); + __ j(kAbove, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load the address of the jump table in the constant area. + __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); + + // Load the (signed) offset from the jump table. + __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); + + // Add the offset to the address of the table base. + __ addq(temp_reg, base_reg); + + // And jump. + __ jmp(temp_reg); } void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { @@ -5413,15 +5423,85 @@ void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { } } +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { + public: + RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) + : codegen_(&codegen), offset_into_constant_area_(offset) {} + + protected: + void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } + + CodeGeneratorX86_64* codegen_; + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; + int32_t relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + // Location in constant area that the fixup refers to. + size_t offset_into_constant_area_; +}; + +/** + t * Class to handle late fixup of offsets to a jump table that will be created in the + * constant area. + */ +class JumpTableRIPFixup : public RIPFixup { + public: + JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) + : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} + + void CreateJumpTable() { + X86_64Assembler* assembler = codegen_->GetAssembler(); + + // Ensure that the reference to the jump table has the correct offset. + const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); + SetOffset(offset_in_constant_table); + + // Compute the offset from the start of the function to this jump table. + const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; + + // Populate the jump table with the correct values for the jump table. + int32_t num_entries = switch_instr_->GetNumEntries(); + HBasicBlock* block = switch_instr_->GetBlock(); + const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); + // The value that we want is the target offset - the position of the table. + for (int32_t i = 0; i < num_entries; i++) { + HBasicBlock* b = successors[i]; + Label* l = codegen_->GetLabelOf(b); + DCHECK(l->IsBound()); + int32_t offset_to_block = l->Position() - current_table_offset; + assembler->AppendInt32(offset_to_block); + } + } + + private: + const HPackedSwitch* switch_instr_; +}; + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); - if (!assembler->IsConstantAreaEmpty()) { - // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 - // byte values. If used for vectors at a later time, this will need to be - // updated to 16 bytes with the appropriate offset. + if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. assembler->Align(4, 0); constant_area_start_ = assembler->CodeSize(); + + // Populate any jump tables. + for (auto jump_table : fixups_to_jump_tables_) { + jump_table->CreateJumpTable(); + } + + // And now add the constant area to the generated code. assembler->AddConstantArea(); } @@ -5429,31 +5509,6 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -/** - * Class to handle late fixup of offsets into constant area. - */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { - public: - RIPFixup(const CodeGeneratorX86_64& codegen, int offset) - : codegen_(codegen), offset_into_constant_area_(offset) {} - - private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { - // Patch the correct offset for the instruction. We use the address of the - // 'next' instruction, which is 'pos' (patch the 4 bytes before). - int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; - int relative_position = constant_offset - pos; - - // Patch in the right value. - region.StoreUnaligned<int32_t>(pos - 4, relative_position); - } - - const CodeGeneratorX86_64& codegen_; - - // Location in constant area that the fixup refers to. - int offset_into_constant_area_; -}; - Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); return Address::RIP(fixup); @@ -5494,6 +5549,16 @@ void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type t GetMoveResolver()->EmitNativeCode(¶llel_move); } +Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { + // Create a fixup to be used to create and address the jump table. + JumpTableRIPFixup* table_fixup = + new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); + + // We have to populate the jump tables. + fixups_to_jump_tables_.push_back(table_fixup); + return Address::RIP(table_fixup); +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index ecc8630e6b..dc86a48ce7 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -30,6 +30,9 @@ namespace x86_64 { // Use a local definition to prevent copying mistakes. static constexpr size_t kX86_64WordSize = kX86_64PointerSize; +// Some x86_64 instructions require a register to be available as temp. +static constexpr Register TMP = R11; + static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; static constexpr FloatRegister kParameterFloatRegisters[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; @@ -231,6 +234,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); }; +// Class for fixups to jump tables. +class JumpTableRIPFixup; + class CodeGeneratorX86_64 : public CodeGenerator { public: CodeGeneratorX86_64(HGraph* graph, @@ -351,6 +357,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Load a 64 bit value into a register in the most efficient manner. void Load64BitValue(CpuRegister dest, int64_t value); + Address LiteralCaseTable(HPackedSwitch* switch_instr); // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); @@ -388,6 +395,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { // We will fix this up in the linker later to have the right value. static constexpr int32_t kDummy32BitOffset = 256; + // Fixups for jump tables need to be handled specially. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/constant_area_fixups_x86.cc new file mode 100644 index 0000000000..c3470002c5 --- /dev/null +++ b/compiler/optimizing/constant_area_fixups_x86.cc @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constant_area_fixups_x86.h" + +namespace art { +namespace x86 { + +/** + * Finds instructions that need the constant area base as an input. + */ +class ConstantHandlerVisitor : public HGraphVisitor { + public: + explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + + private: + void VisitAdd(HAdd* add) OVERRIDE { + BinaryFP(add); + } + + void VisitSub(HSub* sub) OVERRIDE { + BinaryFP(sub); + } + + void VisitMul(HMul* mul) OVERRIDE { + BinaryFP(mul); + } + + void VisitDiv(HDiv* div) OVERRIDE { + BinaryFP(div); + } + + void VisitReturn(HReturn* ret) OVERRIDE { + HConstant* value = ret->InputAt(0)->AsConstant(); + if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) { + ReplaceInput(ret, value, 0, true); + } + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void BinaryFP(HBinaryOperation* bin) { + HConstant* rhs = bin->InputAt(1)->AsConstant(); + if (rhs != nullptr && Primitive::IsFloatingPointType(bin->GetResultType())) { + ReplaceInput(bin, rhs, 1, false); + } + } + + void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to + // address the constant area. + InitializeConstantAreaPointer(switch_insn); + HGraph* graph = GetGraph(); + HBasicBlock* block = switch_insn->GetBlock(); + HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( + switch_insn->GetStartValue(), + switch_insn->GetNumEntries(), + switch_insn->InputAt(0), + base_, + switch_insn->GetDexPc()); + block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); + } + + void InitializeConstantAreaPointer(HInstruction* user) { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return; + } + + HGraph* graph = GetGraph(); + HBasicBlock* entry = graph->GetEntryBlock(); + base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); + HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); + entry->InsertInstructionBefore(base_, insert_pos); + DCHECK(base_ != nullptr); + } + + void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { + InitializeConstantAreaPointer(insn); + HGraph* graph = GetGraph(); + HBasicBlock* block = insn->GetBlock(); + HX86LoadFromConstantTable* load_constant = + new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); + block->InsertInstructionBefore(load_constant, insn); + insn->ReplaceInput(load_constant, input_index); + } + + void HandleInvoke(HInvoke* invoke) { + // Ensure that we can load FP arguments from the constant area. + for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { + HConstant* input = invoke->InputAt(i)->AsConstant(); + if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) { + ReplaceInput(invoke, input, i, true); + } + } + } + + // The generated HX86ComputeBaseMethodAddress in the entry block needed as an + // input to the HX86LoadFromConstantTable instructions. + HX86ComputeBaseMethodAddress* base_; +}; + +void ConstantAreaFixups::Run() { + ConstantHandlerVisitor visitor(graph_); + visitor.VisitInsertionOrder(); +} + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 694f7687ba..b2e222f1a9 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -51,7 +51,7 @@ static void TestCode(const uint16_t* data, X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HConstantFolding(graph).Run(); - SSAChecker ssa_checker_cf(&allocator, graph); + SSAChecker ssa_checker_cf(graph); ssa_checker_cf.Run(); ASSERT_TRUE(ssa_checker_cf.IsValid()); @@ -63,7 +63,7 @@ static void TestCode(const uint16_t* data, check_after_cf(graph); HDeadCodeElimination(graph).Run(); - SSAChecker ssa_checker_dce(&allocator, graph); + SSAChecker ssa_checker_dce(graph); ssa_checker_dce.Run(); ASSERT_TRUE(ssa_checker_dce.IsValid()); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index ee3a61aa0c..cf0a4acd4a 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -45,7 +45,7 @@ static void TestCode(const uint16_t* data, X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HDeadCodeElimination(graph).Run(); - SSAChecker ssa_checker(&allocator, graph); + SSAChecker ssa_checker(graph); ssa_checker.Run(); ASSERT_TRUE(ssa_checker.IsValid()); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 89da1b1fbe..3de96b5d84 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -16,10 +16,12 @@ #include "graph_checker.h" +#include <algorithm> #include <map> #include <string> #include <sstream> +#include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "base/stringprintf.h" @@ -29,19 +31,21 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; // Check consistency with respect to predecessors of `block`. - std::map<HBasicBlock*, size_t> predecessors_count; + ArenaSafeMap<HBasicBlock*, size_t> predecessors_count( + std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker)); for (HBasicBlock* p : block->GetPredecessors()) { - ++predecessors_count[p]; + auto it = predecessors_count.find(p); + if (it != predecessors_count.end()) { + ++it->second; + } else { + predecessors_count.Put(p, 1u); + } } for (auto& pc : predecessors_count) { HBasicBlock* p = pc.first; size_t p_count_in_block_predecessors = pc.second; - size_t block_count_in_p_successors = 0; - for (HBasicBlock* p_successor : p->GetSuccessors()) { - if (p_successor == block) { - ++block_count_in_p_successors; - } - } + size_t block_count_in_p_successors = + std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block); if (p_count_in_block_predecessors != block_count_in_p_successors) { AddError(StringPrintf( "Block %d lists %zu occurrences of block %d in its predecessors, whereas " @@ -52,19 +56,21 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } // Check consistency with respect to successors of `block`. - std::map<HBasicBlock*, size_t> successors_count; + ArenaSafeMap<HBasicBlock*, size_t> successors_count( + std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker)); for (HBasicBlock* s : block->GetSuccessors()) { - ++successors_count[s]; + auto it = successors_count.find(s); + if (it != successors_count.end()) { + ++it->second; + } else { + successors_count.Put(s, 1u); + } } for (auto& sc : successors_count) { HBasicBlock* s = sc.first; size_t s_count_in_block_successors = sc.second; - size_t block_count_in_s_predecessors = 0; - for (HBasicBlock* s_predecessor : s->GetPredecessors()) { - if (s_predecessor == block) { - ++block_count_in_s_predecessors; - } - } + size_t block_count_in_s_predecessors = + std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block); if (s_count_in_block_successors != block_count_in_s_predecessors) { AddError(StringPrintf( "Block %d lists %zu occurrences of block %d in its successors, whereas " diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 7ddffc136a..abf3659d91 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -26,12 +26,11 @@ namespace art { // A control-flow graph visitor performing various checks. class GraphChecker : public HGraphDelegateVisitor { public: - GraphChecker(ArenaAllocator* allocator, HGraph* graph, - const char* dump_prefix = "art::GraphChecker: ") + explicit GraphChecker(HGraph* graph, const char* dump_prefix = "art::GraphChecker: ") : HGraphDelegateVisitor(graph), - allocator_(allocator), + errors_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)), dump_prefix_(dump_prefix), - seen_ids_(allocator, graph->GetCurrentInstructionId(), false) {} + seen_ids_(graph->GetArena(), graph->GetCurrentInstructionId(), false) {} // Check the whole graph (in insertion order). virtual void Run() { VisitInsertionOrder(); } @@ -65,7 +64,7 @@ class GraphChecker : public HGraphDelegateVisitor { } // Get the list of detected errors. - const std::vector<std::string>& GetErrors() const { + const ArenaVector<std::string>& GetErrors() const { return errors_; } @@ -82,11 +81,10 @@ class GraphChecker : public HGraphDelegateVisitor { errors_.push_back(error); } - ArenaAllocator* const allocator_; // The block currently visited. HBasicBlock* current_block_ = nullptr; // Errors encountered while checking the graph. - std::vector<std::string> errors_; + ArenaVector<std::string> errors_; private: // String displayed before dumped errors. @@ -102,9 +100,8 @@ class SSAChecker : public GraphChecker { public: typedef GraphChecker super_type; - // TODO: There's no need to pass a separate allocator as we could get it from the graph. - SSAChecker(ArenaAllocator* allocator, HGraph* graph) - : GraphChecker(allocator, graph, "art::SSAChecker: ") {} + explicit SSAChecker(HGraph* graph) + : GraphChecker(graph, "art::SSAChecker: ") {} // Check the whole graph (in reverse post-order). void Run() OVERRIDE { diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 0f6677519e..fee56c7f9e 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -50,7 +50,7 @@ static void TestCode(const uint16_t* data) { HGraph* graph = CreateCFG(&allocator, data); ASSERT_NE(graph, nullptr); - GraphChecker graph_checker(&allocator, graph); + GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); } @@ -64,7 +64,7 @@ static void TestCodeSSA(const uint16_t* data) { graph->BuildDominatorTree(); graph->TransformToSsa(); - SSAChecker ssa_checker(&allocator, graph); + SSAChecker ssa_checker(graph); ssa_checker.Run(); ASSERT_TRUE(ssa_checker.IsValid()); } @@ -112,7 +112,7 @@ TEST(GraphChecker, InconsistentPredecessorsAndSuccessors) { ArenaAllocator allocator(&pool); HGraph* graph = CreateSimpleCFG(&allocator); - GraphChecker graph_checker(&allocator, graph); + GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); @@ -130,7 +130,7 @@ TEST(GraphChecker, BlockEndingWithNonBranchInstruction) { ArenaAllocator allocator(&pool); HGraph* graph = CreateSimpleCFG(&allocator); - GraphChecker graph_checker(&allocator, graph); + GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17c851cac9..d468540091 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -76,6 +76,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; + void SimplifySystemArrayCopy(HInvoke* invoke); + void SimplifyStringEquals(HInvoke* invoke); + OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -1051,28 +1054,100 @@ void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); } -void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { - if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) { - HInstruction* argument = instruction->InputAt(1); - HInstruction* receiver = instruction->InputAt(0); - if (receiver == argument) { - // Because String.equals is an instance call, the receiver is - // a null check if we don't know it's null. The argument however, will - // be the actual object. So we cannot end up in a situation where both - // are equal but could be null. - DCHECK(CanEnsureNotNullAt(argument, instruction)); - instruction->ReplaceWith(GetGraph()->GetIntConstant(1)); - instruction->GetBlock()->RemoveInstruction(instruction); - } else { - StringEqualsOptimizations optimizations(instruction); - if (CanEnsureNotNullAt(argument, instruction)) { - optimizations.SetArgumentNotNull(); +void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { + HInstruction* argument = instruction->InputAt(1); + HInstruction* receiver = instruction->InputAt(0); + if (receiver == argument) { + // Because String.equals is an instance call, the receiver is + // a null check if we don't know it's null. The argument however, will + // be the actual object. So we cannot end up in a situation where both + // are equal but could be null. + DCHECK(CanEnsureNotNullAt(argument, instruction)); + instruction->ReplaceWith(GetGraph()->GetIntConstant(1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else { + StringEqualsOptimizations optimizations(instruction); + if (CanEnsureNotNullAt(argument, instruction)) { + optimizations.SetArgumentNotNull(); + } + ScopedObjectAccess soa(Thread::Current()); + ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo(); + if (argument_rti.IsValid() && argument_rti.IsStringClass()) { + optimizations.SetArgumentIsString(); + } + } +} + +static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) { + if (potential_length->IsArrayLength()) { + return potential_length->InputAt(0) == potential_array; + } + + if (potential_array->IsNewArray()) { + return potential_array->InputAt(0) == potential_length; + } + + return false; +} + +void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) { + HInstruction* source = instruction->InputAt(0); + HInstruction* destination = instruction->InputAt(2); + HInstruction* count = instruction->InputAt(4); + SystemArrayCopyOptimizations optimizations(instruction); + if (CanEnsureNotNullAt(source, instruction)) { + optimizations.SetSourceIsNotNull(); + } + if (CanEnsureNotNullAt(destination, instruction)) { + optimizations.SetDestinationIsNotNull(); + } + if (destination == source) { + optimizations.SetDestinationIsSource(); + } + + if (IsArrayLengthOf(count, source)) { + optimizations.SetCountIsSourceLength(); + } + + if (IsArrayLengthOf(count, destination)) { + optimizations.SetCountIsDestinationLength(); + } + + { + ScopedObjectAccess soa(Thread::Current()); + ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo(); + if (destination_rti.IsValid()) { + if (destination_rti.IsObjectArray()) { + if (destination_rti.IsExact()) { + optimizations.SetDoesNotNeedTypeCheck(); + } + optimizations.SetDestinationIsTypedObjectArray(); } - ScopedObjectAccess soa(Thread::Current()); - if (argument->GetReferenceTypeInfo().IsStringClass()) { - optimizations.SetArgumentIsString(); + if (destination_rti.IsPrimitiveArrayClass()) { + optimizations.SetDestinationIsPrimitiveArray(); + } else if (destination_rti.IsNonPrimitiveArrayClass()) { + optimizations.SetDestinationIsNonPrimitiveArray(); } } + ReferenceTypeInfo source_rti = source->GetReferenceTypeInfo(); + if (source_rti.IsValid()) { + if (destination_rti.IsValid() && destination_rti.CanArrayHoldValuesOf(source_rti)) { + optimizations.SetDoesNotNeedTypeCheck(); + } + if (source_rti.IsPrimitiveArrayClass()) { + optimizations.SetSourceIsPrimitiveArray(); + } else if (source_rti.IsNonPrimitiveArrayClass()) { + optimizations.SetSourceIsNonPrimitiveArray(); + } + } + } +} + +void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { + if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) { + SimplifyStringEquals(instruction); + } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) { + SimplifySystemArrayCopy(instruction); } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 95646222ef..dbe75249be 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -210,6 +210,9 @@ static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_s case kIntrinsicSystemArrayCopyCharArray: return Intrinsics::kSystemArrayCopyChar; + case kIntrinsicSystemArrayCopy: + return Intrinsics::kSystemArrayCopy; + // Thread.currentThread. case kIntrinsicCurrentThread: return Intrinsics::kThreadCurrentThread; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index d50fe79f93..e459516e59 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -168,6 +168,26 @@ class StringEqualsOptimizations : public IntrinsicOptimizations { DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations); }; +class SystemArrayCopyOptimizations : public IntrinsicOptimizations { + public: + explicit SystemArrayCopyOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {} + + INTRINSIC_OPTIMIZATION(SourceIsNotNull, 0); + INTRINSIC_OPTIMIZATION(DestinationIsNotNull, 1); + INTRINSIC_OPTIMIZATION(DestinationIsSource, 2); + INTRINSIC_OPTIMIZATION(CountIsSourceLength, 3); + INTRINSIC_OPTIMIZATION(CountIsDestinationLength, 4); + INTRINSIC_OPTIMIZATION(DoesNotNeedTypeCheck, 5); + INTRINSIC_OPTIMIZATION(DestinationIsTypedObjectArray, 6); + INTRINSIC_OPTIMIZATION(DestinationIsNonPrimitiveArray, 7); + INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8); + INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9); + INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10); + + private: + DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations); +}; + #undef INTRISIC_OPTIMIZATION } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 938c78e9c1..58e479afc7 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1307,6 +1307,308 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); + LocationSummary* locations = invoke->GetLocations(); + if (locations == nullptr) { + return; + } + + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + + if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { + locations->SetInAt(1, Location::RequiresRegister()); + } + if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) { + locations->SetInAt(3, Location::RequiresRegister()); + } + if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { + locations->SetInAt(4, Location::RequiresRegister()); + } +} + +static void CheckPosition(ArmAssembler* assembler, + Location pos, + Register input, + Location length, + SlowPathCode* slow_path, + Register input_len, + Register temp, + bool length_is_input_length = false) { + // Where is the length in the Array? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + if (pos.IsConstant()) { + int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); + if (pos_const == 0) { + if (!length_is_input_length) { + // Check that length(input) >= length. + __ LoadFromOffset(kLoadWord, temp, input, length_offset); + if (length.IsConstant()) { + __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmp(temp, ShifterOperand(length.AsRegister<Register>())); + } + __ b(slow_path->GetEntryLabel(), LT); + } + } else { + // Check that length(input) >= pos. + __ LoadFromOffset(kLoadWord, input_len, input, length_offset); + __ subs(temp, input_len, ShifterOperand(pos_const)); + __ b(slow_path->GetEntryLabel(), LT); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmp(temp, ShifterOperand(length.AsRegister<Register>())); + } + __ b(slow_path->GetEntryLabel(), LT); + } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + Register pos_reg = pos.AsRegister<Register>(); + __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel()); + } else { + // Check that pos >= 0. + Register pos_reg = pos.AsRegister<Register>(); + __ cmp(pos_reg, ShifterOperand(0)); + __ b(slow_path->GetEntryLabel(), LT); + + // Check that pos <= length(input). + __ LoadFromOffset(kLoadWord, temp, input, length_offset); + __ subs(temp, temp, ShifterOperand(pos_reg)); + __ b(slow_path->GetEntryLabel(), LT); + + // Check that (length(input) - pos) >= length. + if (length.IsConstant()) { + __ cmp(temp, ShifterOperand(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmp(temp, ShifterOperand(length.AsRegister<Register>())); + } + __ b(slow_path->GetEntryLabel(), LT); + } +} + +void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location src_pos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + + Label ok; + SystemArrayCopyOptimizations optimizations(invoke); + + if (!optimizations.GetDestinationIsSource()) { + if (!src_pos.IsConstant() || !dest_pos.IsConstant()) { + __ cmp(src, ShifterOperand(dest)); + } + } + + // If source and destination are the same, we go to slow path if we need to do + // forward copying. + if (src_pos.IsConstant()) { + int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + if (dest_pos.IsConstant()) { + // Checked when building locations. + DCHECK(!optimizations.GetDestinationIsSource() + || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); + } else { + if (!optimizations.GetDestinationIsSource()) { + __ b(&ok, NE); + } + __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); + __ b(slow_path->GetEntryLabel(), GT); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ b(&ok, NE); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos_constant)); + } else { + __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>())); + } + __ b(slow_path->GetEntryLabel(), LT); + } + + __ Bind(&ok); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel()); + } + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant() && + !optimizations.GetCountIsSourceLength() && + !optimizations.GetCountIsDestinationLength()) { + __ cmp(length.AsRegister<Register>(), ShifterOperand(0)); + __ b(slow_path->GetEntryLabel(), LT); + } + + // Validity checks: source. + CheckPosition(assembler, + src_pos, + src, + length, + slow_path, + temp1, + temp2, + optimizations.GetCountIsSourceLength()); + + // Validity checks: dest. + CheckPosition(assembler, + dest_pos, + dest, + length, + slow_path, + temp1, + temp2, + optimizations.GetCountIsDestinationLength()); + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); + __ LoadFromOffset(kLoadWord, temp2, src, class_offset); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoisoned them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // Bail out if the destination is not a non primitive array. + __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); + __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + } + + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + __ MaybeUnpoisonHeapReference(temp1); + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // No need to unpoison the result, we're comparing against null. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + __ LoadFromOffset(kLoadWord, temp1, src, class_offset); + __ MaybeUnpoisonHeapReference(temp1); + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + } + + // Compute base source address, base destination address, and end source address. + + uint32_t element_size = sizeof(int32_t); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp1, src, element_size * constant + offset); + } else { + __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2)); + __ AddConstant(temp1, offset); + } + + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2)); + __ AddConstant(temp2, offset); + } + + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp3, temp1, element_size * constant); + } else { + __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison, nor do any read barrier as the next uses of the destination + // array will do it. + Label loop, done; + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + __ Bind(&done); + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(temp1, + temp2, + dest, + Register(kNoRegister), + false); + + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index 2abb605e6e..127e9a4aa0 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -33,8 +33,10 @@ class CodeGeneratorARM; class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { public: - IntrinsicLocationsBuilderARM(ArenaAllocator* arena, const ArmInstructionSetFeatures& features) - : arena_(arena), features_(features) {} + IntrinsicLocationsBuilderARM(ArenaAllocator* arena, + ArmAssembler* assembler, + const ArmInstructionSetFeatures& features) + : arena_(arena), assembler_(assembler), features_(features) {} // Define visitor methods. @@ -52,6 +54,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + ArmAssembler* assembler_; const ArmInstructionSetFeatures& features_; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index b0cfd0d1bc..4da94ee9b3 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1447,6 +1447,7 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED } UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index bfe5e55c56..8f1d5e1c4d 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -58,6 +58,7 @@ V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \ V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \ V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \ + V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \ V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \ V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \ V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \ diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index b60905d682..764a11475f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -812,6 +812,7 @@ UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 263c37596f..e83aebb5be 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2255,6 +2255,7 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 098db4ca28..e0d88a91d3 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -752,7 +752,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); - // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). + // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); locations->SetInAt(2, Location::RequiresRegister()); @@ -768,19 +768,27 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) static void CheckPosition(X86_64Assembler* assembler, Location pos, CpuRegister input, - CpuRegister length, + Location length, SlowPathCode* slow_path, CpuRegister input_len, - CpuRegister temp) { - // Where is the length in the String? + CpuRegister temp, + bool length_is_input_length = false) { + // Where is the length in the Array? const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); if (pos.IsConstant()) { int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); if (pos_const == 0) { - // Check that length(input) >= length. - __ cmpl(Address(input, length_offset), length); - __ j(kLess, slow_path->GetEntryLabel()); + if (!length_is_input_length) { + // Check that length(input) >= length. + if (length.IsConstant()) { + __ cmpl(Address(input, length_offset), + Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>()); + } + __ j(kLess, slow_path->GetEntryLabel()); + } } else { // Check that length(input) >= pos. __ movl(input_len, Address(input, length_offset)); @@ -789,9 +797,18 @@ static void CheckPosition(X86_64Assembler* assembler, // Check that (length(input) - pos) >= length. __ leal(temp, Address(input_len, -pos_const)); - __ cmpl(temp, length); + if (length.IsConstant()) { + __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(temp, length.AsRegister<CpuRegister>()); + } __ j(kLess, slow_path->GetEntryLabel()); } + } else if (length_is_input_length) { + // The only way the copy can succeed is if pos is zero. + CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); + __ testl(pos_reg, pos_reg); + __ j(kNotEqual, slow_path->GetEntryLabel()); } else { // Check that pos >= 0. CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); @@ -805,7 +822,11 @@ static void CheckPosition(X86_64Assembler* assembler, // Check that (length(input) - pos) >= length. __ movl(temp, Address(input, length_offset)); __ subl(temp, pos_reg); - __ cmpl(temp, length); + if (length.IsConstant()) { + __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(temp, length.AsRegister<CpuRegister>()); + } __ j(kLess, slow_path->GetEntryLabel()); } } @@ -815,9 +836,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); - Location srcPos = locations->InAt(1); + Location src_pos = locations->InAt(1); CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); - Location destPos = locations->InAt(3); + Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); // Temporaries that we need for MOVSW. @@ -850,6 +871,12 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { __ j(kLess, slow_path->GetEntryLabel()); } + // Validity checks: source. + CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base); + + // Validity checks: dest. + CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base); + // We need the count in RCX. if (length.IsConstant()) { __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); @@ -857,12 +884,6 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { __ movl(count, length.AsRegister<CpuRegister>()); } - // Validity checks: source. - CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base); - - // Validity checks: dest. - CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base); - // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); @@ -870,18 +891,18 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); - if (srcPos.IsConstant()) { - int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); + if (src_pos.IsConstant()) { + int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(src_base, Address(src, char_size * src_pos_const + data_offset)); } else { - __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(), + __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_2, data_offset)); } - if (destPos.IsConstant()) { - int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); + if (dest_pos.IsConstant()) { + int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset)); } else { - __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(), + __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_2, data_offset)); } @@ -891,6 +912,231 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } + +void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + + CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); + Location src_pos = locations->InAt(1); + CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); + + SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + NearLabel ok; + SystemArrayCopyOptimizations optimizations(invoke); + + if (!optimizations.GetDestinationIsSource()) { + if (!src_pos.IsConstant() || !dest_pos.IsConstant()) { + __ cmpl(src, dest); + } + } + + // If source and destination are the same, we go to slow path if we need to do + // forward copying. + if (src_pos.IsConstant()) { + int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + if (dest_pos.IsConstant()) { + // Checked when building locations. + DCHECK(!optimizations.GetDestinationIsSource() + || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); + } else { + if (!optimizations.GetDestinationIsSource()) { + __ j(kNotEqual, &ok); + } + __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); + __ j(kGreater, slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ j(kNotEqual, &ok); + } + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); + __ j(kLess, slow_path->GetEntryLabel()); + } else { + __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + } + + __ Bind(&ok); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ testl(src, src); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ testl(dest, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant() && + !optimizations.GetCountIsSourceLength() && + !optimizations.GetCountIsDestinationLength()) { + __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + + // Validity checks: source. + CheckPosition(assembler, + src_pos, + src, + length, + slow_path, + temp1, + temp2, + optimizations.GetCountIsSourceLength()); + + // Validity checks: dest. + CheckPosition(assembler, + dest_pos, + dest, + length, + slow_path, + temp1, + temp2, + optimizations.GetCountIsDestinationLength()); + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + __ movl(temp1, Address(dest, class_offset)); + __ movl(temp2, Address(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoisoned them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + __ movl(CpuRegister(TMP), Address(temp2, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + __ cmpl(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + __ movl(temp1, Address(temp1, component_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ movl(temp1, Address(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ testl(temp1, temp1); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Compute base source address, base destination address, and end source address. + + uint32_t element_size = sizeof(int32_t); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp1, Address(src, element_size * constant + offset)); + } else { + __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); + } + + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); + } + + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison, nor do any read barrier as the next uses of the destination + // array will do it. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(temp1, + temp2, + dest, + CpuRegister(kNoRegister), + false); + + __ Bind(slow_path->GetExitLabel()); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 24a89bca4e..ed401b67c5 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -606,8 +606,23 @@ static void UpdateInputsUsers(HInstruction* instruction) { void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement) { DCHECK(initial->GetBlock() == this); - InsertInstructionBefore(replacement, initial); - initial->ReplaceWith(replacement); + if (initial->IsControlFlow()) { + // We can only replace a control flow instruction with another control flow instruction. + DCHECK(replacement->IsControlFlow()); + DCHECK_EQ(replacement->GetId(), -1); + DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid); + DCHECK_EQ(initial->GetBlock(), this); + DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid); + DCHECK(initial->GetUses().IsEmpty()); + DCHECK(initial->GetEnvUses().IsEmpty()); + replacement->SetBlock(this); + replacement->SetId(GetGraph()->GetNextInstructionId()); + instructions_.InsertInstructionBefore(replacement, initial); + UpdateInputsUsers(replacement); + } else { + InsertInstructionBefore(replacement, initial); + initial->ReplaceWith(replacement); + } RemoveInstruction(initial); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4cd5133e95..2426f8b08d 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1085,7 +1085,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ M(X86ComputeBaseMethodAddress, Instruction) \ - M(X86LoadFromConstantTable, Instruction) + M(X86LoadFromConstantTable, Instruction) \ + M(X86PackedSwitch, Instruction) #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) @@ -1648,7 +1649,8 @@ class ReferenceTypeInfo : ValueObject { } bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - return IsValid() && GetTypeHandle()->IsStringClass(); + DCHECK(IsValid()); + return GetTypeHandle()->IsStringClass(); } bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { @@ -1662,15 +1664,36 @@ class ReferenceTypeInfo : ValueObject { } bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); return GetTypeHandle()->IsArrayClass(); } + bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsPrimitiveArray(); + } + + bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); + } + bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); if (!IsExact()) return false; if (!IsArrayClass()) return false; return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); } + bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + if (!rti.IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom( + rti.GetTypeHandle()->GetComponentType()); + } + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { @@ -4527,7 +4550,7 @@ class HArraySet : public HTemplateInstruction<3> { class HArrayLength : public HExpression<1> { public: - explicit HArrayLength(HInstruction* array, uint32_t dex_pc) + HArrayLength(HInstruction* array, uint32_t dex_pc) : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { // Note that arrays do not change length, so the instruction does not // depend on any write. @@ -4642,6 +4665,9 @@ class HLoadClass : public HExpression<1> { generate_clinit_check_(false), needs_access_check_(needs_access_check), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + // Referrers class should not need access check. We never inline unverified + // methods so we can't possibly end up in this situation. + DCHECK(!is_referrers_class_ || !needs_access_check_); SetRawInputAt(0, current_method); } @@ -4664,7 +4690,7 @@ class HLoadClass : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { // Will call runtime and load the class if the class is not loaded yet. // TODO: finer grain decision. - return !is_referrers_class_ || needs_access_check_; + return !is_referrers_class_; } bool MustGenerateClinitCheck() const { diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index f7cc872419..556217bf74 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -62,6 +62,45 @@ class HX86LoadFromConstantTable : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable); }; +// X86 version of HPackedSwitch that holds a pointer to the base method address. +class HX86PackedSwitch : public HTemplateInstruction<2> { + public: + HX86PackedSwitch(int32_t start_value, + int32_t num_entries, + HInstruction* input, + HX86ComputeBaseMethodAddress* method_base, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None(), dex_pc), + start_value_(start_value), + num_entries_(num_entries) { + SetRawInputAt(0, input); + SetRawInputAt(1, method_base); + } + + bool IsControlFlow() const OVERRIDE { return true; } + + int32_t GetStartValue() const { return start_value_; } + + int32_t GetNumEntries() const { return num_entries_; } + + HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const { + return InputAt(1)->AsX86ComputeBaseMethodAddress(); + } + + HBasicBlock* GetDefaultBlock() const { + // Last entry is the default block. + return GetBlock()->GetSuccessors()[num_entries_]; + } + + DECLARE_INSTRUCTION(X86PackedSwitch); + + private: + const int32_t start_value_; + const int32_t num_entries_; + + DISALLOW_COPY_AND_ASSIGN(HX86PackedSwitch); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_X86_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b501980dbe..c7f08066d4 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -169,13 +169,13 @@ class PassObserver : public ValueObject { if (kIsDebugBuild) { if (!graph_in_bad_state_) { if (graph_->IsInSsaForm()) { - SSAChecker checker(graph_->GetArena(), graph_); + SSAChecker checker(graph_); checker.Run(); if (!checker.IsValid()) { LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<SSAChecker>(checker); } } else { - GraphChecker checker(graph_->GetArena(), graph_); + GraphChecker checker(graph_); checker.Run(); if (!checker.IsValid()) { LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker); |