diff options
Diffstat (limited to 'compiler/optimizing')
30 files changed, 692 insertions, 1307 deletions
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 26bf1cbc75..1d604e7135 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -56,7 +56,6 @@ class HGraphBuilder : public ValueObject { return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])), code_start_(nullptr), latest_result_(nullptr), - can_use_baseline_for_string_init_(true), compilation_stats_(compiler_stats), interpreter_metadata_(interpreter_metadata), dex_cache_(dex_cache) {} @@ -77,7 +76,6 @@ class HGraphBuilder : public ValueObject { return_type_(return_type), code_start_(nullptr), latest_result_(nullptr), - can_use_baseline_for_string_init_(true), compilation_stats_(nullptr), interpreter_metadata_(nullptr), null_dex_cache_(), @@ -85,10 +83,6 @@ class HGraphBuilder : public ValueObject { bool BuildGraph(const DexFile::CodeItem& code); - bool CanUseBaselineForStringInit() const { - return can_use_baseline_for_string_init_; - } - static constexpr const char* kBuilderPassName = "builder"; // The number of entries in a packed switch before we use a jump table or specified @@ -363,11 +357,6 @@ class HGraphBuilder : public ValueObject { // used by move-result instructions. HInstruction* latest_result_; - // We need to know whether we have built a graph that has calls to StringFactory - // and hasn't gone through the verifier. If the following flag is `false`, then - // we cannot compile with baseline. - bool can_use_baseline_for_string_init_; - OptimizingCompilerStats* compilation_stats_; const uint8_t* interpreter_metadata_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ea0b9eca9a..a3bbfdbd27 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -142,23 +142,6 @@ size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { return pointer_size * index; } -void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - Initialize(); - if (!is_leaf) { - MarkNotLeaf(); - } - const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet()); - InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs() - + GetGraph()->GetTemporariesVRegSlots() - + 1 /* filler */, - 0, /* the baseline compiler does not have live registers at slow path */ - 0, /* the baseline compiler does not have live registers at slow path */ - GetGraph()->GetMaximumNumberOfOutVRegs() - + (is_64_bit ? 2 : 1) /* current method */, - GetGraph()->GetBlocks()); - CompileInternal(allocator, /* is_baseline */ true); -} - bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { DCHECK_EQ((*block_order_)[current_block_index_], current); return GetNextBlockToEmit() == FirstNonEmptyBlock(next); @@ -220,8 +203,12 @@ void CodeGenerator::GenerateSlowPaths() { current_slow_path_ = nullptr; } -void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - is_baseline_ = is_baseline; +void CodeGenerator::Compile(CodeAllocator* allocator) { + // The register allocator already called `InitializeCodeGeneration`, + // where the frame size has been computed. + DCHECK(block_order_ != nullptr); + Initialize(); + HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); @@ -242,9 +229,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); DisassemblyScope disassembly_scope(current, *this); - if (is_baseline) { - InitLocationsBaseline(current); - } DCHECK(CheckTypeConsistency(current)); current->Accept(instruction_visitor); } @@ -254,7 +238,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) // Emit catch stack maps at the end of the stack map stream as expected by the // runtime exception handler. - if (!is_baseline && graph_->HasTryCatch()) { + if (graph_->HasTryCatch()) { RecordCatchBlockInfo(); } @@ -262,14 +246,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) Finalize(allocator); } -void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { - // The register allocator already called `InitializeCodeGeneration`, - // where the frame size has been computed. - DCHECK(block_order_ != nullptr); - Initialize(); - CompileInternal(allocator, /* is_baseline */ false); -} - void CodeGenerator::Finalize(CodeAllocator* allocator) { size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); @@ -282,29 +258,6 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A // No linker patches by default. } -size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) { - for (size_t i = 0; i < length; ++i) { - if (!array[i]) { - array[i] = true; - return i; - } - } - LOG(FATAL) << "Could not find a register in baseline register allocator"; - UNREACHABLE(); -} - -size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) { - for (size_t i = 0; i < length - 1; i += 2) { - if (!array[i] && !array[i + 1]) { - array[i] = true; - array[i + 1] = true; - return i; - } - } - LOG(FATAL) << "Could not find a register in baseline register allocator"; - UNREACHABLE(); -} - void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fpu_registers, @@ -592,123 +545,6 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { } } -void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { - LocationSummary* locations = instruction->GetLocations(); - if (locations == nullptr) return; - - for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - blocked_core_registers_[i] = false; - } - - for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - blocked_fpu_registers_[i] = false; - } - - for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) { - blocked_register_pairs_[i] = false; - } - - // Mark all fixed input, temp and output registers as used. - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - BlockIfInRegister(locations->InAt(i)); - } - - for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { - Location loc = locations->GetTemp(i); - BlockIfInRegister(loc); - } - Location result_location = locations->Out(); - if (locations->OutputCanOverlapWithInputs()) { - BlockIfInRegister(result_location, /* is_out */ true); - } - - SetupBlockedRegisters(/* is_baseline */ true); - - // Allocate all unallocated input locations. - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - Location loc = locations->InAt(i); - HInstruction* input = instruction->InputAt(i); - if (loc.IsUnallocated()) { - if ((loc.GetPolicy() == Location::kRequiresRegister) - || (loc.GetPolicy() == Location::kRequiresFpuRegister)) { - loc = AllocateFreeRegister(input->GetType()); - } else { - DCHECK_EQ(loc.GetPolicy(), Location::kAny); - HLoadLocal* load = input->AsLoadLocal(); - if (load != nullptr) { - loc = GetStackLocation(load); - } else { - loc = AllocateFreeRegister(input->GetType()); - } - } - locations->SetInAt(i, loc); - } - } - - // Allocate all unallocated temp locations. - for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { - Location loc = locations->GetTemp(i); - if (loc.IsUnallocated()) { - switch (loc.GetPolicy()) { - case Location::kRequiresRegister: - // Allocate a core register (large enough to fit a 32-bit integer). - loc = AllocateFreeRegister(Primitive::kPrimInt); - break; - - case Location::kRequiresFpuRegister: - // Allocate a core register (large enough to fit a 64-bit double). - loc = AllocateFreeRegister(Primitive::kPrimDouble); - break; - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << loc.GetPolicy(); - } - locations->SetTempAt(i, loc); - } - } - if (result_location.IsUnallocated()) { - switch (result_location.GetPolicy()) { - case Location::kAny: - case Location::kRequiresRegister: - case Location::kRequiresFpuRegister: - result_location = AllocateFreeRegister(instruction->GetType()); - break; - case Location::kSameAsFirstInput: - result_location = locations->InAt(0); - break; - } - locations->UpdateOut(result_location); - } -} - -void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { - AllocateLocations(instruction); - if (instruction->GetLocations() == nullptr) { - if (instruction->IsTemporary()) { - HInstruction* previous = instruction->GetPrevious(); - Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); - Move(previous, temp_location, instruction); - } - return; - } - AllocateRegistersLocally(instruction); - for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { - Location location = instruction->GetLocations()->InAt(i); - HInstruction* input = instruction->InputAt(i); - if (location.IsValid()) { - // Move the input to the desired location. - if (input->GetNext()->IsTemporary()) { - // If the input was stored in a temporary, use that temporary to - // perform the move. - Move(input->GetNext(), location, instruction); - } else { - Move(input, location, instruction); - } - } - } -} - void CodeGenerator::AllocateLocations(HInstruction* instruction) { instruction->Accept(GetLocationBuilder()); DCHECK(CheckTypeConsistency(instruction)); @@ -789,132 +625,6 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, } } -void CodeGenerator::BuildNativeGCMap( - ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const { - const std::vector<uint8_t>& gc_map_raw = - compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx()) - ->GetDexGcMap(); - verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - - uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); - - size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); - GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i != num_stack_maps; ++i) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - uint32_t native_offset = stack_map_entry.native_pc_offset; - uint32_t dex_pc = stack_map_entry.dex_pc; - const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); - CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; - builder.AddEntry(native_offset, references); - } -} - -void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const { - uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); - uint32_t pc2dex_offset = 0u; - int32_t pc2dex_dalvik_offset = 0; - uint32_t dex2pc_data_size = 0u; - uint32_t dex2pc_entries = 0u; - uint32_t dex2pc_offset = 0u; - int32_t dex2pc_dalvik_offset = 0; - - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = stack_map_entry.native_pc_offset; - pc2dex_dalvik_offset = stack_map_entry.dex_pc; - } - - // Walk over the blocks and find which ones correspond to catch block entries. - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - intptr_t native_pc = GetAddressOf(block); - ++dex2pc_entries; - dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset); - dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset); - dex2pc_offset = native_pc; - dex2pc_dalvik_offset = block->GetDexPc(); - } - } - - uint32_t total_entries = pc2dex_entries + dex2pc_entries; - uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries); - uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size; - data->resize(data_size); - - uint8_t* data_ptr = &(*data)[0]; - uint8_t* write_pos = data_ptr; - - write_pos = EncodeUnsignedLeb128(write_pos, total_entries); - write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries); - DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size); - uint8_t* write_pos2 = write_pos + pc2dex_data_size; - - pc2dex_offset = 0u; - pc2dex_dalvik_offset = 0u; - dex2pc_offset = 0u; - dex2pc_dalvik_offset = 0u; - - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); - write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = stack_map_entry.native_pc_offset; - pc2dex_dalvik_offset = stack_map_entry.dex_pc; - } - - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - intptr_t native_pc = GetAddressOf(block); - write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset); - write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset); - dex2pc_offset = native_pc; - dex2pc_dalvik_offset = block->GetDexPc(); - } - } - - - DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size); - DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size); - - if (kIsDebugBuild) { - // Verify the encoded table holds the expected data. - MappingTable table(data_ptr); - CHECK_EQ(table.TotalSize(), total_entries); - CHECK_EQ(table.PcToDexSize(), pc2dex_entries); - auto it = table.PcToDexBegin(); - auto it2 = table.DexToPcBegin(); - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); - CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); - ++it; - } - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - CHECK_EQ(GetAddressOf(block), it2.NativePcOffset()); - CHECK_EQ(block->GetDexPc(), it2.DexPc()); - ++it2; - } - } - CHECK(it == table.PcToDexEnd()); - CHECK(it2 == table.DexToPcEnd()); - } -} - -void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const { - Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data); - // We currently don't use callee-saved registers. - size_t size = 0 + 1 /* marker */ + 0; - vmap_encoder.Reserve(size + 1u); // All values are likely to be one byte in ULEB128 (<128). - vmap_encoder.PushBackUnsigned(size); - vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); -} - size_t CodeGenerator::ComputeStackMapsSize() { return stack_map_stream_.PrepareForFillIn(); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5958cd89bc..4f8f146753 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -158,10 +158,8 @@ class FieldAccessCallingConvention { class CodeGenerator { public: - // Compiles the graph to executable instructions. Returns whether the compilation - // succeeded. - void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); - void CompileOptimized(CodeAllocator* allocator); + // Compiles the graph to executable instructions. + void Compile(CodeAllocator* allocator); static CodeGenerator* Create(HGraph* graph, InstructionSet instruction_set, const InstructionSetFeatures& isa_features, @@ -214,7 +212,7 @@ class CodeGenerator { size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } - virtual void SetupBlockedRegisters(bool is_baseline) const = 0; + virtual void SetupBlockedRegisters() const = 0; virtual void ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; @@ -290,17 +288,9 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void BuildMappingTable(ArenaVector<uint8_t>* vector) const; - void BuildVMapTable(ArenaVector<uint8_t>* vector) const; - void BuildNativeGCMap( - ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const; void BuildStackMaps(MemoryRegion region); size_t ComputeStackMapsSize(); - bool IsBaseline() const { - return is_baseline_; - } - bool IsLeafMethod() const { return is_leaf_; } @@ -489,7 +479,6 @@ class CodeGenerator { fpu_callee_save_mask_(fpu_callee_save_mask), stack_map_stream_(graph->GetArena()), block_order_(nullptr), - is_baseline_(false), disasm_info_(nullptr), stats_(stats), graph_(graph), @@ -502,15 +491,6 @@ class CodeGenerator { slow_paths_.reserve(8); } - // Register allocation logic. - void AllocateRegistersLocally(HInstruction* instruction) const; - - // Backend specific implementation for allocating a register. - virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; - - static size_t FindFreeEntry(bool* array, size_t length); - static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); - virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; @@ -593,16 +573,11 @@ class CodeGenerator { // The order to use for code generation. const ArenaVector<HBasicBlock*>* block_order_; - // Whether we are using baseline. - bool is_baseline_; - DisassemblyInformation* disasm_info_; private: - void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); - void CompileInternal(CodeAllocator* allocator, bool is_baseline); void BlockIfInRegister(Location location, bool is_out = false) const; void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index a11ceb9bd9..272579219f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -47,9 +47,7 @@ static bool ExpectedPairLayout(Location location) { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = R0; -// We unconditionally allocate R5 to ensure we can do long operations -// with baseline. -static constexpr Register kCoreSavedRegisterForBaseline = R5; +static constexpr Register kCoreAlwaysSpillRegister = R5; static constexpr Register kCoreCalleeSaves[] = { R5, R6, R7, R8, R10, R11, LR }; static constexpr SRegister kFpuCalleeSaves[] = @@ -728,6 +726,24 @@ inline Condition ARMUnsignedCondition(IfCondition cond) { UNREACHABLE(); } +inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) { + // The ARM condition codes can express all the necessary branches, see the + // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual. + // There is no dex instruction or HIR that would need the missing conditions + // "equal or unordered" or "not equal". + switch (cond) { + case kCondEQ: return EQ; + case kCondNE: return NE /* unordered */; + case kCondLT: return gt_bias ? CC : LT /* unordered */; + case kCondLE: return gt_bias ? LS : LE /* unordered */; + case kCondGT: return gt_bias ? HI /* unordered */ : GT; + case kCondGE: return gt_bias ? CS /* unordered */ : GE; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { stream << Register(reg); } @@ -815,58 +831,7 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - ArmManagedRegister pair = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - ArmManagedRegister current = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: { - int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimDouble: { - int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters); - DCHECK_EQ(reg % 2, 0); - return Location::FpuRegisterPairLocation(reg, reg + 1); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorARM::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[R1_R2] = true; @@ -881,15 +846,7 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { // Reserve temp register. blocked_core_registers_[IP] = true; - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - - blocked_core_registers_[kCoreSavedRegisterForBaseline] = false; - } - - if (is_baseline || GetGraph()->IsDebuggable()) { + if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For // now, just block them. @@ -919,11 +876,10 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + // There is no easy instruction to restore just the PC on thumb2. We spill and + // restore another arbitrary register. + core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister); fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take // a SRegister and the number of registers to save/restore after that SRegister. We @@ -1416,15 +1372,9 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, - Label* false_label) { + Label* false_label ATTRIBUTE_UNUSED) { __ vmstat(); // transfer FP status register to ARM APSR. - // TODO: merge into a single branch (except "equal or unordered" and "not equal") - if (cond->IsFPConditionTrueIfNaN()) { - __ b(true_label, VS); // VS for unordered. - } else if (cond->IsFPConditionFalseIfNaN()) { - __ b(false_label, VS); // VS for unordered. - } - __ b(true_label, ARMCondition(cond->GetCondition())); + __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias())); } void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, @@ -1972,9 +1922,9 @@ void InstructionCodeGeneratorARM::VisitInvokeUnresolved(HInvokeUnresolved* invok } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), codegen_->GetAssembler(), @@ -2004,9 +1954,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) } void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -3803,6 +3753,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); + Condition less_cond; switch (type) { case Primitive::kPrimLong: { __ cmp(left.AsRegisterPairHigh<Register>(), @@ -3813,6 +3764,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { __ LoadImmediate(out, 0); __ cmp(left.AsRegisterPairLow<Register>(), ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare. + less_cond = LO; break; } case Primitive::kPrimFloat: @@ -3825,14 +3777,15 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); } __ vmstat(); // transfer FP status register to ARM APSR. - __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered. + less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); break; } default: LOG(FATAL) << "Unexpected compare type " << type; + UNREACHABLE(); } __ b(&done, EQ); - __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats. + __ b(&less, less_cond); __ Bind(&greater); __ LoadImmediate(out, 1); @@ -5530,7 +5483,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -5740,8 +5693,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6027,6 +5980,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); codegen_->AddSlowPath(slow_path); + // IP = Thread::Current()->GetIsGcMarking() __ LoadFromOffset( kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value()); __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); @@ -6105,11 +6059,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // } // // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as: - // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; - // - it performs additional checks that we do not do here for - // performance reasons. + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. Register ref_reg = ref.AsRegister<Register>(); Register temp_reg = temp.AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 26d6d63b31..d45ea973f9 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -340,9 +340,7 @@ class CodeGeneratorARM : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -444,7 +442,7 @@ class CodeGeneratorARM : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t offset, Location temp, @@ -452,7 +450,7 @@ class CodeGeneratorARM : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5e905fc9aa..2cb2741b17 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -93,6 +93,24 @@ inline Condition ARM64Condition(IfCondition cond) { UNREACHABLE(); } +inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { + // The ARM64 condition codes can express all the necessary branches, see the + // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. + // There is no dex instruction or HIR that would need the missing conditions + // "equal or unordered" or "not equal". + switch (cond) { + case kCondEQ: return eq; + case kCondNE: return ne /* unordered */; + case kCondLT: return gt_bias ? cc : lt /* unordered */; + case kCondLE: return gt_bias ? ls : le /* unordered */; + case kCondGT: return gt_bias ? hi /* unordered */ : gt; + case kCondGE: return gt_bias ? cs /* unordered */ : ge; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + Location ARM64ReturnLocation(Primitive::Type return_type) { // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, @@ -604,30 +622,13 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())); + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress())); __ Bind(GetEntryLabel()); - // Note: In the case of a HArrayGet instruction, when the base - // address is a HArm64IntermediateAddress instruction, it does not - // point to the array object itself, but to an offset within this - // object. However, the read barrier entry point needs the array - // object address to be passed as first argument. So we - // temporarily set back `obj_` to that address, and restore its - // initial value later. - if (instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { - if (kIsDebugBuild) { - HArm64IntermediateAddress* intermediate_address = - instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); - uint32_t intermediate_address_offset = - intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); - DCHECK_EQ(intermediate_address_offset, offset_); - DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); - } - Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); - __ Sub(obj_reg, obj_reg, offset_); - } - SaveLiveRegisters(codegen, locations); // We may have to change the index's value, but as `index_` is a @@ -728,22 +729,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { RestoreLiveRegisters(codegen, locations); - // Restore the value of `obj_` when it corresponds to a - // HArm64IntermediateAddress instruction. - if (instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { - if (kIsDebugBuild) { - HArm64IntermediateAddress* intermediate_address = - instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); - uint32_t intermediate_address_offset = - intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); - DCHECK_EQ(intermediate_address_offset, offset_); - DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); - } - Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); - __ Add(obj_reg, obj_reg, offset_); - } - __ B(GetExitLabel()); } @@ -1127,7 +1112,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ } } -void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorARM64::SetupBlockedRegisters() const { // Blocked core registers: // lr : Runtime reserved. // tr : Runtime reserved. @@ -1148,40 +1133,17 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; } - if (is_baseline) { - CPURegList reserved_core_baseline_registers = callee_saved_core_registers; - while (!reserved_core_baseline_registers.IsEmpty()) { - blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; - } - } - - if (is_baseline || GetGraph()->IsDebuggable()) { + if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For // now, just block them. - CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; - while (!reserved_fp_baseline_registers.IsEmpty()) { - blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; + CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; + while (!reserved_fp_registers_debuggable.IsEmpty()) { + blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true; } } } -Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { - if (type == Primitive::kPrimVoid) { - LOG(FATAL) << "Unreachable type " << type; - } - - if (Primitive::IsFloatingPointType(type)) { - ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters); - DCHECK_NE(reg, -1); - return Location::FpuRegisterLocation(reg); - } else { - ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters); - DCHECK_NE(reg, -1); - return Location::RegisterLocation(reg); - } -} - size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); __ Str(reg, MemOperand(sp, stack_index)); @@ -1970,6 +1932,9 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); @@ -1979,6 +1944,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( HArm64IntermediateAddress* instruction) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(InputOperandAt(instruction, 1))); @@ -2067,6 +2035,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsArm64IntermediateAddress()) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. @@ -2093,11 +2064,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); } else { - // Note: when `obj_loc` is a HArm64IntermediateAddress, it does - // not contain the base address of the array object, which is - // needed by the read barrier entry point. So the read barrier - // slow path will temporarily set back `obj_loc` to the right - // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); } } @@ -2161,6 +2127,9 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (instruction->GetArray()->IsArm64IntermediateAddress()) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. @@ -2407,12 +2376,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { } else { __ Fcmp(left, InputFPRegisterAt(compare, 1)); } - if (compare->IsGtBias()) { - __ Cset(result, ne); - } else { - __ Csetm(result, ne); - } - __ Cneg(result, result, compare->IsGtBias() ? mi : gt); + __ Cset(result, ne); + __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); break; } default: @@ -2448,7 +2413,6 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = instruction->GetLocations(); Register res = RegisterFrom(locations->Out(), instruction->GetType()); IfCondition if_cond = instruction->GetCondition(); - Condition arm64_cond = ARM64Condition(if_cond); if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { FPRegister lhs = InputFPRegisterAt(instruction, 0); @@ -2459,20 +2423,13 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { } else { __ Fcmp(lhs, InputFPRegisterAt(instruction, 1)); } - __ Cset(res, arm64_cond); - if (instruction->IsFPConditionTrueIfNaN()) { - // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1 - __ Csel(res, res, Operand(1), vc); // VC for "not unordered". - } else if (instruction->IsFPConditionFalseIfNaN()) { - // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0 - __ Csel(res, res, Operand(0), vc); // VC for "not unordered". - } + __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); } else { // Integer cases. Register lhs = InputRegisterAt(instruction, 0); Operand rhs = InputOperandAt(instruction, 1); __ Cmp(lhs, rhs); - __ Cset(res, arm64_cond); + __ Cset(res, ARM64Condition(if_cond)); } } @@ -2842,15 +2799,11 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } else { __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); } - if (condition->IsFPConditionTrueIfNaN()) { - __ B(vs, true_target == nullptr ? &fallthrough_target : true_target); - } else if (condition->IsFPConditionFalseIfNaN()) { - __ B(vs, false_target == nullptr ? &fallthrough_target : false_target); - } if (true_target == nullptr) { - __ B(ARM64Condition(condition->GetOppositeCondition()), false_target); + IfCondition opposite_condition = condition->GetOppositeCondition(); + __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); } else { - __ B(ARM64Condition(condition->GetCondition()), true_target); + __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); } } else { // Integer cases. @@ -3488,9 +3441,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); if (intrinsic.TryDispatch(invoke)) { @@ -3738,9 +3691,9 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral( void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f2ff89488e..8eb9fcc558 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -339,10 +339,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index e34767cecd..5bd136a3f0 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1042,7 +1042,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorMIPS::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[A1_A2] = true; @@ -1072,16 +1072,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const { blocked_fpu_registers_[i] = true; } - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } - UpdateBlockedPairRegisters(); } @@ -1096,52 +1086,6 @@ void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const { } } -Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - MipsManagedRegister pair = - MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - MipsManagedRegister current = - MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - UNREACHABLE(); -} - size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index); return kMipsWordSize; @@ -3835,9 +3779,9 @@ void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -3973,9 +3917,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c3d4851ee9..2cde0ed90b 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -290,10 +290,7 @@ class CodeGeneratorMIPS : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 79cd56d698..05054867fe 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -979,7 +979,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { __ Bind(&done); } -void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { +void CodeGeneratorMIPS64::SetupBlockedRegisters() const { // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated. blocked_core_registers_[ZERO] = true; blocked_core_registers_[K0] = true; @@ -1003,8 +1003,7 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE // TODO: review; anything else? - // TODO: make these two for's conditional on is_baseline once - // all the issues with register saving/restoring are sorted out. + // TODO: remove once all the issues with register saving/restoring are sorted out. for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { blocked_core_registers_[kCoreCalleeSaves[i]] = true; } @@ -1014,20 +1013,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE } } -Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const { - if (type == Primitive::kPrimVoid) { - LOG(FATAL) << "Unreachable type " << type; - } - - if (Primitive::IsFloatingPointType(type)) { - size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters); - return Location::FpuRegisterLocation(reg); - } else { - size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters); - return Location::RegisterLocation(reg); - } -} - size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index); return kMips64WordSize; @@ -3031,9 +3016,9 @@ void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -3182,9 +3167,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo } void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 7182e8e987..140ff95f14 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -289,10 +289,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6259acded3..f7ccdd8b8f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -817,65 +817,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } -Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - X86ManagedRegister pair = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register reg = static_cast<Register>( - FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters)); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - X86ManagedRegister current = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - return Location::FpuRegisterLocation( - FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters)); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorX86::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[ECX_EDX] = true; // Stack register is always reserved. blocked_core_registers_[ESP] = true; - if (is_baseline) { - blocked_core_registers_[EBP] = true; - blocked_core_registers_[ESI] = true; - blocked_core_registers_[EDI] = true; - } - UpdateBlockedPairRegisters(); } @@ -1981,9 +1929,9 @@ void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invok } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -1999,17 +1947,6 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok if (invoke->HasPcRelativeDexCache()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } - - if (codegen_->IsBaseline()) { - // Baseline does not have enough registers if the current method also - // needs a register. We therefore do not require a register for it, and let - // the code generation of the invoke handle it. - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetSpecialInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); - } - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -2022,9 +1959,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) } void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -4286,7 +4223,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, if (current_method.IsRegister()) { method_reg = current_method.AsRegister<Register>(); } else { - DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified()); + DCHECK(invoke->GetLocations()->Intrinsified()); DCHECK(!current_method.IsValid()); method_reg = reg; __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); @@ -5076,11 +5013,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { - // This location builder might end up asking to up to four registers, which is - // not currently possible for baseline. The situation in which we need four - // registers cannot be met by baseline though, because it has not run any - // optimization. - Primitive::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = @@ -6077,7 +6009,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -6308,8 +6240,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6588,6 +6520,8 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ movl(root_reg, Address(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. } } @@ -6650,7 +6584,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Note: the original implementation in ReadBarrier::Barrier is // slightly more complex as: // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; + // the high-bits of rb_state, which are expected to be all zeroes + // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here, + // which is a no-op thanks to the x86 memory model); // - it performs additional checks that we do not do here for // performance reasons. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index c65c423eae..43e9543e41 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -359,9 +359,7 @@ class CodeGeneratorX86 : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -453,7 +451,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t offset, Location temp, @@ -461,7 +459,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e024ce2b6c..2ce2d91502 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1002,47 +1002,12 @@ InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, assembler_(codegen->GetAssembler()), codegen_(codegen) {} -Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters); - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorX86_64::SetupBlockedRegisters() const { // Stack register is always reserved. blocked_core_registers_[RSP] = true; // Block the register used as TMP. blocked_core_registers_[TMP] = true; - - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } } static dwarf::Reg DWARFReg(Register reg) { @@ -2161,9 +2126,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* in } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -2183,9 +2148,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg } void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -4698,13 +4663,13 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool object_array_set_with_read_barrier = kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call || object_array_set_with_read_barrier) ? + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); @@ -4733,7 +4698,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4785,7 +4750,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4794,7 +4759,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { NearLabel done, not_null, do_put; SlowPathCode* slow_path = nullptr; CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4872,7 +4837,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } else { __ movl(address, register_value); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -5661,7 +5626,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -5892,8 +5857,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6155,6 +6120,8 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ movl(root_reg, Address(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. } } @@ -6217,7 +6184,9 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Note: the original implementation in ReadBarrier::Barrier is // slightly more complex as: // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; + // the high-bits of rb_state, which are expected to be all zeroes + // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead + // here, which is a no-op thanks to the x86-64 memory model); // - it performs additional checks that we do not do here for // performance reasons. diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 505c9dcdad..82aabb04d3 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -347,8 +347,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location GetStackLocation(HLoadLocal* load) const OVERRIDE; - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -401,7 +400,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, CpuRegister obj, uint32_t offset, Location temp, @@ -409,7 +408,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, CpuRegister obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index d970704368..19d63de499 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -40,6 +40,7 @@ #include "dex_file.h" #include "dex_instruction.h" #include "driver/compiler_options.h" +#include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -70,8 +71,8 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM { AddAllocatedRegister(Location::RegisterLocation(arm::R7)); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE { - arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline); + void SetupBlockedRegisters() const OVERRIDE { + arm::CodeGeneratorARM::SetupBlockedRegisters(); blocked_core_registers_[arm::R4] = true; blocked_core_registers_[arm::R6] = false; blocked_core_registers_[arm::R7] = false; @@ -90,8 +91,8 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { AddAllocatedRegister(Location::RegisterLocation(x86::EDI)); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE { - x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline); + void SetupBlockedRegisters() const OVERRIDE { + x86::CodeGeneratorX86::SetupBlockedRegisters(); // ebx is a callee-save register in C, but caller-save for ART. blocked_core_registers_[x86::EBX] = true; blocked_register_pairs_[x86::EAX_EBX] = true; @@ -200,259 +201,228 @@ static void Run(const InternalCodeAllocator& allocator, } template <typename Expected> -static void RunCodeBaseline(InstructionSet target_isa, - HGraph* graph, - bool has_result, - Expected expected) { - InternalCodeAllocator allocator; - - CompilerOptions compiler_options; - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); - // We avoid doing a stack overflow check that requires the runtime being setup, - // by making sure the compiler knows the methods we are running are leaf methods. - codegenX86.CompileBaseline(&allocator, true); - if (target_isa == kX86) { - Run(allocator, codegenX86, has_result, expected); - } +static void RunCode(CodeGenerator* codegen, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { + ASSERT_TRUE(graph->IsInSsaForm()); - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); - codegenARM.CompileBaseline(&allocator, true); - if (target_isa == kArm || target_isa == kThumb2) { - Run(allocator, codegenARM, has_result, expected); - } - - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); - codegenX86_64.CompileBaseline(&allocator, true); - if (target_isa == kX86_64) { - Run(allocator, codegenX86_64, has_result, expected); - } - - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); - codegenARM64.CompileBaseline(&allocator, true); - if (target_isa == kArm64) { - Run(allocator, codegenARM64, has_result, expected); - } - - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); - codegenMIPS.CompileBaseline(&allocator, true); - if (kRuntimeISA == kMips) { - Run(allocator, codegenMIPS, has_result, expected); - } - - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); - codegenMIPS64.CompileBaseline(&allocator, true); - if (target_isa == kMips64) { - Run(allocator, codegenMIPS64, has_result, expected); - } -} + SSAChecker graph_checker(graph); + graph_checker.Run(); + ASSERT_TRUE(graph_checker.IsValid()); -template <typename Expected> -static void RunCodeOptimized(CodeGenerator* codegen, - HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, - bool has_result, - Expected expected) { - // Tests may have already computed it. - if (graph->GetReversePostOrder().empty()) { - graph->BuildDominatorTree(); - } SsaLivenessAnalysis liveness(graph, codegen); - liveness.Analyze(); - RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); - register_allocator.AllocateRegisters(); + PrepareForRegisterAllocation(graph).Run(); + liveness.Analyze(); + RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); hook_before_codegen(graph); InternalCodeAllocator allocator; - codegen->CompileOptimized(&allocator); + codegen->Compile(&allocator); Run(allocator, *codegen, has_result, expected); } template <typename Expected> -static void RunCodeOptimized(InstructionSet target_isa, - HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, - bool has_result, - Expected expected) { +static void RunCode(InstructionSet target_isa, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { CompilerOptions compiler_options; if (target_isa == kArm || target_isa == kThumb2) { std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); - RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kArm64) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); - RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); - RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); - RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kMips) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); - RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kMips64) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); - RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); } } -static void TestCode(InstructionSet target_isa, - const uint16_t* data, +static ::std::vector<InstructionSet> GetTargetISAs() { + ::std::vector<InstructionSet> v; + // Add all ISAs that are executable on hardware or on simulator. + const ::std::vector<InstructionSet> executable_isa_candidates = { + kArm, + kArm64, + kThumb2, + kX86, + kX86_64, + kMips, + kMips64 + }; + + for (auto target_isa : executable_isa_candidates) { + if (CanExecute(target_isa)) { + v.push_back(target_isa); + } + } + + return v; +} + +static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateGraph(&arena); - HGraphBuilder builder(graph); - const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - bool graph_built = builder.BuildGraph(*item); - ASSERT_TRUE(graph_built); - // Remove suspend checks, they cannot be executed in this context. - RemoveSuspendChecks(graph); - RunCodeBaseline(target_isa, graph, has_result, expected); -} - -static void TestCodeLong(InstructionSet target_isa, - const uint16_t* data, + for (InstructionSet target_isa : GetTargetISAs()) { + ArenaPool pool; + ArenaAllocator arena(&pool); + HGraph* graph = CreateGraph(&arena); + HGraphBuilder builder(graph); + const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); + // Remove suspend checks, they cannot be executed in this context. + RemoveSuspendChecks(graph); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + } +} + +static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateGraph(&arena); - HGraphBuilder builder(graph, Primitive::kPrimLong); - const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - bool graph_built = builder.BuildGraph(*item); - ASSERT_TRUE(graph_built); - // Remove suspend checks, they cannot be executed in this context. - RemoveSuspendChecks(graph); - RunCodeBaseline(target_isa, graph, has_result, expected); + for (InstructionSet target_isa : GetTargetISAs()) { + ArenaPool pool; + ArenaAllocator arena(&pool); + HGraph* graph = CreateGraph(&arena); + HGraphBuilder builder(graph, Primitive::kPrimLong); + const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); + // Remove suspend checks, they cannot be executed in this context. + RemoveSuspendChecks(graph); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + } } -class CodegenTest: public ::testing::TestWithParam<InstructionSet> {}; +class CodegenTest : public CommonCompilerTest {}; -TEST_P(CodegenTest, ReturnVoid) { +TEST_F(CodegenTest, ReturnVoid) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG1) { +TEST_F(CodegenTest, CFG1) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG2) { +TEST_F(CodegenTest, CFG2) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG3) { +TEST_F(CodegenTest, CFG3) { const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); - TestCode(GetParam(), data1); + TestCode(data1); const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); - TestCode(GetParam(), data2); + TestCode(data2); const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); - TestCode(GetParam(), data3); + TestCode(data3); } -TEST_P(CodegenTest, CFG4) { +TEST_F(CodegenTest, CFG4) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG5) { +TEST_F(CodegenTest, CFG5) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, IntConstant) { +TEST_F(CodegenTest, IntConstant) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, Return1) { +TEST_F(CodegenTest, Return1) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } -TEST_P(CodegenTest, Return2) { +TEST_F(CodegenTest, Return2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } -TEST_P(CodegenTest, Return3) { +TEST_F(CodegenTest, Return3) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnIf1) { +TEST_F(CodegenTest, ReturnIf1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -460,10 +430,10 @@ TEST_P(CodegenTest, ReturnIf1) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnIf2) { +TEST_F(CodegenTest, ReturnIf2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -471,12 +441,12 @@ TEST_P(CodegenTest, ReturnIf2) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } // Exercise bit-wise (one's complement) not-int instruction. #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_P(CodegenTest, TEST_NAME) { \ +TEST_F(CodegenTest, TEST_NAME) { \ const int32_t input = INPUT; \ const uint16_t input_lo = Low16Bits(input); \ const uint16_t input_hi = High16Bits(input); \ @@ -485,7 +455,7 @@ TEST_P(CodegenTest, TEST_NAME) { \ Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ Instruction::RETURN | 1 << 8); \ \ - TestCode(GetParam(), data, true, EXPECTED_OUTPUT); \ + TestCode(data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -501,7 +471,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648) // -(2^31) // Exercise bit-wise (one's complement) not-long instruction. #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_P(CodegenTest, TEST_NAME) { \ +TEST_F(CodegenTest, TEST_NAME) { \ const int64_t input = INPUT; \ const uint16_t word0 = Low16Bits(Low32Bits(input)); /* LSW. */ \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ @@ -512,7 +482,7 @@ TEST_P(CodegenTest, TEST_NAME) { \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ \ - TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT); \ + TestCodeLong(data, true, EXPECTED_OUTPUT); \ } NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1)) @@ -551,7 +521,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX, #undef NOT_LONG_TEST -TEST_P(CodegenTest, IntToLongOfLongToInt) { +TEST_F(CodegenTest, IntToLongOfLongToInt) { const int64_t input = INT64_C(4294967296); // 2^32 const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW. const uint16_t word1 = High16Bits(Low32Bits(input)); @@ -565,192 +535,146 @@ TEST_P(CodegenTest, IntToLongOfLongToInt) { Instruction::INT_TO_LONG | 2 << 8 | 4 << 12, Instruction::RETURN_WIDE | 2 << 8); - TestCodeLong(GetParam(), data, true, 1); + TestCodeLong(data, true, 1); } -TEST_P(CodegenTest, ReturnAdd1) { +TEST_F(CodegenTest, ReturnAdd1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd2) { +TEST_F(CodegenTest, ReturnAdd2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd3) { +TEST_F(CodegenTest, ReturnAdd3) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd4) { +TEST_F(CodegenTest, ReturnAdd4) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); -} - -TEST_P(CodegenTest, NonMaterializedCondition) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry); - graph->SetEntryBlock(entry); - entry->AddInstruction(new (&allocator) HGoto()); - - HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(first_block); - entry->AddSuccessor(first_block); - HIntConstant* constant0 = graph->GetIntConstant(0); - HIntConstant* constant1 = graph->GetIntConstant(1); - HEqual* equal = new (&allocator) HEqual(constant0, constant0); - first_block->AddInstruction(equal); - first_block->AddInstruction(new (&allocator) HIf(equal)); - - HBasicBlock* then = new (&allocator) HBasicBlock(graph); - HBasicBlock* else_ = new (&allocator) HBasicBlock(graph); - HBasicBlock* exit = new (&allocator) HBasicBlock(graph); - - graph->AddBlock(then); - graph->AddBlock(else_); - graph->AddBlock(exit); - first_block->AddSuccessor(then); - first_block->AddSuccessor(else_); - then->AddSuccessor(exit); - else_->AddSuccessor(exit); - - exit->AddInstruction(new (&allocator) HExit()); - then->AddInstruction(new (&allocator) HReturn(constant0)); - else_->AddInstruction(new (&allocator) HReturn(constant1)); - - ASSERT_TRUE(equal->NeedsMaterialization()); - graph->BuildDominatorTree(); - PrepareForRegisterAllocation(graph).Run(); - ASSERT_FALSE(equal->NeedsMaterialization()); - - auto hook_before_codegen = [](HGraph* graph_in) { - HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); - block->InsertInstructionBefore(move, block->GetLastInstruction()); - }; - - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnMulInt) { +TEST_F(CodegenTest, ReturnMulInt) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulInt2addr) { +TEST_F(CodegenTest, ReturnMulInt2addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulLong) { +TEST_F(CodegenTest, ReturnMulLong) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( - Instruction::CONST_4 | 3 << 12 | 0, - Instruction::CONST_4 | 0 << 12 | 1 << 8, - Instruction::CONST_4 | 4 << 12 | 2 << 8, - Instruction::CONST_4 | 0 << 12 | 3 << 8, + Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, + Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG, 2 << 8 | 0, Instruction::RETURN_WIDE); - TestCodeLong(GetParam(), data, true, 12); + TestCodeLong(data, true, 12); } -TEST_P(CodegenTest, ReturnMulLong2addr) { +TEST_F(CodegenTest, ReturnMulLong2addr) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( - Instruction::CONST_4 | 3 << 12 | 0 << 8, - Instruction::CONST_4 | 0 << 12 | 1 << 8, - Instruction::CONST_4 | 4 << 12 | 2 << 8, - Instruction::CONST_4 | 0 << 12 | 3 << 8, + Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, + Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG_2ADDR | 2 << 12, Instruction::RETURN_WIDE); - TestCodeLong(GetParam(), data, true, 12); + TestCodeLong(data, true, 12); } -TEST_P(CodegenTest, ReturnMulIntLit8) { +TEST_F(CodegenTest, ReturnMulIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulIntLit16) { +TEST_F(CodegenTest, ReturnMulIntLit16) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, MaterializedCondition1) { - // Check that condition are materialized correctly. A materialized condition - // should yield `1` if it evaluated to true, and `0` otherwise. - // We force the materialization of comparisons for different combinations of - // inputs and check the results. - - int lhs[] = {1, 2, -1, 2, 0xabc}; - int rhs[] = {2, 1, 2, -1, 0xabc}; - - for (size_t i = 0; i < arraysize(lhs); i++) { +TEST_F(CodegenTest, NonMaterializedCondition) { + for (InstructionSet target_isa : GetTargetISAs()) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry_block); - graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); - HBasicBlock* code_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(code_block); + HGraph* graph = CreateGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + entry->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(first_block); + entry->AddSuccessor(first_block); + HIntConstant* constant0 = graph->GetIntConstant(0); + HIntConstant* constant1 = graph->GetIntConstant(1); + HEqual* equal = new (&allocator) HEqual(constant0, constant0); + first_block->AddInstruction(equal); + first_block->AddInstruction(new (&allocator) HIf(equal)); + + HBasicBlock* then_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* else_block = new (&allocator) HBasicBlock(graph); HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->SetExitBlock(exit_block); + + graph->AddBlock(then_block); + graph->AddBlock(else_block); graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + first_block->AddSuccessor(then_block); + first_block->AddSuccessor(else_block); + then_block->AddSuccessor(exit_block); + else_block->AddSuccessor(exit_block); - entry_block->AddSuccessor(code_block); - code_block->AddSuccessor(exit_block); - graph->SetExitBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + then_block->AddInstruction(new (&allocator) HReturn(constant0)); + else_block->AddInstruction(new (&allocator) HReturn(constant1)); - HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); - HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); - HLessThan cmp_lt(cst_lhs, cst_rhs); - code_block->AddInstruction(&cmp_lt); - HReturn ret(&cmp_lt); - code_block->AddInstruction(&ret); + ASSERT_TRUE(equal->NeedsMaterialization()); + TransformToSsa(graph); + PrepareForRegisterAllocation(graph).Run(); + ASSERT_FALSE(equal->NeedsMaterialization()); auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; @@ -758,93 +682,143 @@ TEST_P(CodegenTest, MaterializedCondition1) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCode(target_isa, graph, hook_before_codegen, true, 0); } } -TEST_P(CodegenTest, MaterializedCondition2) { - // Check that HIf correctly interprets a materialized condition. - // We force the materialization of comparisons for different combinations of - // inputs. An HIf takes the materialized combination as input and returns a - // value that we verify. - - int lhs[] = {1, 2, -1, 2, 0xabc}; - int rhs[] = {2, 1, 2, -1, 0xabc}; - - - for (size_t i = 0; i < arraysize(lhs); i++) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry_block); - graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); - - HBasicBlock* if_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_block); - HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_true_block); - HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_false_block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); - - graph->SetEntryBlock(entry_block); - entry_block->AddSuccessor(if_block); - if_block->AddSuccessor(if_true_block); - if_block->AddSuccessor(if_false_block); - if_true_block->AddSuccessor(exit_block); - if_false_block->AddSuccessor(exit_block); - graph->SetExitBlock(exit_block); - - HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); - HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); - HLessThan cmp_lt(cst_lhs, cst_rhs); - if_block->AddInstruction(&cmp_lt); - // We insert a temporary to separate the HIf from the HLessThan and force - // the materialization of the condition. - HTemporary force_materialization(0); - if_block->AddInstruction(&force_materialization); - HIf if_lt(&cmp_lt); - if_block->AddInstruction(&if_lt); - - HIntConstant* cst_lt = graph->GetIntConstant(1); - HReturn ret_lt(cst_lt); - if_true_block->AddInstruction(&ret_lt); - HIntConstant* cst_ge = graph->GetIntConstant(0); - HReturn ret_ge(cst_ge); - if_false_block->AddInstruction(&ret_ge); - - auto hook_before_codegen = [](HGraph* graph_in) { - HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); - block->InsertInstructionBefore(move, block->GetLastInstruction()); - }; +TEST_F(CodegenTest, MaterializedCondition1) { + for (InstructionSet target_isa : GetTargetISAs()) { + // Check that condition are materialized correctly. A materialized condition + // should yield `1` if it evaluated to true, and `0` otherwise. + // We force the materialization of comparisons for different combinations of + + // inputs and check the results. + + int lhs[] = {1, 2, -1, 2, 0xabc}; + int rhs[] = {2, 1, 2, -1, 0xabc}; + + for (size_t i = 0; i < arraysize(lhs); i++) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + + HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + entry_block->AddInstruction(new (&allocator) HGoto()); + HBasicBlock* code_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(code_block); + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + entry_block->AddSuccessor(code_block); + code_block->AddSuccessor(exit_block); + graph->SetExitBlock(exit_block); + + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); + code_block->AddInstruction(&cmp_lt); + HReturn ret(&cmp_lt); + code_block->AddInstruction(&ret); + + TransformToSsa(graph); + auto hook_before_codegen = [](HGraph* graph_in) { + HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; + HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + block->InsertInstructionBefore(move, block->GetLastInstruction()); + }; + RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + } + } +} - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); +TEST_F(CodegenTest, MaterializedCondition2) { + for (InstructionSet target_isa : GetTargetISAs()) { + // Check that HIf correctly interprets a materialized condition. + // We force the materialization of comparisons for different combinations of + // inputs. An HIf takes the materialized combination as input and returns a + // value that we verify. + + int lhs[] = {1, 2, -1, 2, 0xabc}; + int rhs[] = {2, 1, 2, -1, 0xabc}; + + + for (size_t i = 0; i < arraysize(lhs); i++) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + + HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + entry_block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* if_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_block); + HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_true_block); + HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_false_block); + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_true_block); + if_block->AddSuccessor(if_false_block); + if_true_block->AddSuccessor(exit_block); + if_false_block->AddSuccessor(exit_block); + graph->SetExitBlock(exit_block); + + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); + if_block->AddInstruction(&cmp_lt); + // We insert a temporary to separate the HIf from the HLessThan and force + // the materialization of the condition. + HTemporary force_materialization(0); + if_block->AddInstruction(&force_materialization); + HIf if_lt(&cmp_lt); + if_block->AddInstruction(&if_lt); + + HIntConstant* cst_lt = graph->GetIntConstant(1); + HReturn ret_lt(cst_lt); + if_true_block->AddInstruction(&ret_lt); + HIntConstant* cst_ge = graph->GetIntConstant(0); + HReturn ret_ge(cst_ge); + if_false_block->AddInstruction(&ret_ge); + + TransformToSsa(graph); + auto hook_before_codegen = [](HGraph* graph_in) { + HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; + HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + block->InsertInstructionBefore(move, block->GetLastInstruction()); + }; + RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + } } } -TEST_P(CodegenTest, ReturnDivIntLit8) { +TEST_F(CodegenTest, ReturnDivIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnDivInt2Addr) { +TEST_F(CodegenTest, ReturnDivInt2Addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 2); + TestCode(data, true, 2); } // Helper method. @@ -933,80 +907,55 @@ static void TestComparison(IfCondition condition, block->AddInstruction(comparison); block->AddInstruction(new (&allocator) HReturn(comparison)); - auto hook_before_codegen = [](HGraph*) { - }; - RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result); -} - -TEST_P(CodegenTest, ComparisonsInt) { - const InstructionSet target_isa = GetParam(); - for (int64_t i = -1; i <= 1; i++) { - for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result); +} + +TEST_F(CodegenTest, ComparisonsInt) { + for (InstructionSet target_isa : GetTargetISAs()) { + for (int64_t i = -1; i <= 1; i++) { + for (int64_t j = -1; j <= 1; j++) { + TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); + } } } } -TEST_P(CodegenTest, ComparisonsLong) { +TEST_F(CodegenTest, ComparisonsLong) { // TODO: make MIPS work for long if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { return; } - const InstructionSet target_isa = GetParam(); - if (target_isa == kMips || target_isa == kMips64) { - return; - } - - for (int64_t i = -1; i <= 1; i++) { - for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); + for (InstructionSet target_isa : GetTargetISAs()) { + if (target_isa == kMips || target_isa == kMips64) { + continue; } - } -} -static ::std::vector<InstructionSet> GetTargetISAs() { - ::std::vector<InstructionSet> v; - // Add all ISAs that are executable on hardware or on simulator. - const ::std::vector<InstructionSet> executable_isa_candidates = { - kArm, - kArm64, - kThumb2, - kX86, - kX86_64, - kMips, - kMips64 - }; - - for (auto target_isa : executable_isa_candidates) { - if (CanExecute(target_isa)) { - v.push_back(target_isa); + for (int64_t i = -1; i <= 1; i++) { + for (int64_t j = -1; j <= 1; j++) { + TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); + } } } - - return v; } -INSTANTIATE_TEST_CASE_P(MultipleTargets, - CodegenTest, - ::testing::ValuesIn(GetTargetISAs())); - } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 86a695b152..e170e37bdd 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -89,15 +89,18 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { } void HDeadCodeElimination::RemoveDeadBlocks() { + if (graph_->HasIrreducibleLoops()) { + // Do not eliminate dead blocks if the graph has irreducible loops. We could + // support it, but that would require changes in our loop representation to handle + // multiple entry points. We decided it was not worth the complexity. + return; + } // Classify blocks as reachable/unreachable. ArenaAllocator* allocator = graph_->GetArena(); ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false); MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; - // If the graph has irreducible loops we need to reset all graph analysis we have done - // before: the irreducible loop can be turned into a reducible one. - // For simplicity, we do the full computation regardless of the type of the loops. bool rerun_dominance_and_loop_analysis = false; // Remove all dead blocks. Iterate in post order because removal needs the @@ -105,9 +108,6 @@ void HDeadCodeElimination::RemoveDeadBlocks() { // inside out. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { - rerun_dominance_and_loop_analysis = true; - } int id = block->GetBlockId(); if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 91e4a997fd..feb8b2092a 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) { const uint32_t dominators[] = { kInvalidBlockId, - 0, - kInvalidBlockId + 3, + kInvalidBlockId, + 0 }; TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 9439ba0c8d..31136772c7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -484,6 +484,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } + if (loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf( + "Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); + } + + if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + AddError(StringPrintf( + "Loop header %d does not have the loop suspend check as the first instruction.", + loop_header->GetBlockId())); + } + // Ensure the loop header has only one incoming branch and the remaining // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().size(); @@ -589,6 +601,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } + if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) { + AddError(StringPrintf("Instruction %s:%d in block %d requires an environment " + "but does not have one.", + instruction->DebugName(), + instruction->GetId(), + current_block_->GetBlockId())); + } + // Ensure an instruction having an environment is dominated by the // instructions contained in the environment. for (HEnvironment* environment = instruction->GetEnvironment(); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index d4b9b71952..d5305646a8 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. @@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 293282edbb..2e79df1b84 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -356,12 +356,12 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, compare, invoke_instruction->GetDexPc()); // TODO: Extend reference type propagation to understand the guard. if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(load_class, cursor); + bb_cursor->InsertInstructionAfter(field_get, cursor); } else { - bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction()); + bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(field_get, load_class); - bb_cursor->InsertInstructionAfter(compare, field_get); + bb_cursor->InsertInstructionAfter(load_class, field_get); + bb_cursor->InsertInstructionAfter(compare, load_class); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { VLOG(compiler) << "Method " << PrettyMethod(method) - << " is too big to inline"; + << " is too big to inline: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } @@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader()) { + + if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { + // Don't inline methods with irreducible loops, they could prevent some + // optimizations to run. VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it contains a loop"; + << " could not be inlined because it contains an irreducible loop"; return false; } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6bbc751bee..4bcfc54791 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -30,6 +30,15 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio HInstruction* array, HInstruction* index, int access_size) { + if (kEmitCompilerReadBarrier) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + // + // TODO: Handle this case properly in the ARM64 code generator and + // re-enable this optimization; otherwise, remove this TODO. + // b/26601270 + return; + } if (index->IsConstant() || (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { // When the index is a constant all the addressing can be fitted in the diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 9f50d1814e..3bf3f7ffae 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -85,9 +85,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) InvokeDexCallingConventionVisitor* calling_convention_visitor) { if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); } if (invoke->GetNumberOfArguments() == 0) { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 854d92a409..adf8734214 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -167,11 +167,7 @@ void HGraph::ClearDominanceInformation() { void HGraph::ClearLoopInformation() { SetHasIrreducibleLoops(false); for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); - if (current->IsLoopHeader()) { - current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck()); - } - current->SetLoopInformation(nullptr); + it.Current()->SetLoopInformation(nullptr); } } @@ -180,6 +176,14 @@ void HBasicBlock::ClearDominanceInformation() { dominator_ = nullptr; } +HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const { + HInstruction* instruction = GetFirstInstruction(); + while (instruction->IsParallelMove()) { + instruction = instruction->GetNext(); + } + return instruction; +} + void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); @@ -284,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the - // loop. + // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining + // this graph. size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges(); - if (number_of_incomings != 1) { + if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) { HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc())); @@ -457,6 +462,10 @@ void HGraph::SimplifyCFG() { } if (block->IsLoopHeader()) { SimplifyLoop(block); + } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) { + // We are being called by the dead code elimiation pass, and what used to be + // a loop got dismantled. Just remove the suspend check. + block->RemoveInstruction(block->GetFirstInstruction()); } } } @@ -1829,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { DCHECK(GetBlocks()[0]->IsEntryBlock()); DCHECK(GetBlocks()[2]->IsExitBlock()); DCHECK(!body->IsExitBlock()); + DCHECK(!body->IsInLoop()); HInstruction* last = body->GetLastInstruction(); invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); @@ -1887,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the meta information surrounding blocks: // (1) the graph they are now in, // (2) the reverse post order of that graph, - // (3) the potential loop information they are now in, + // (3) their potential loop information, inner and outer, // (4) try block membership. // Note that we do not need to update catch phi inputs because they // correspond to the register file of the outer method which the inlinee @@ -1916,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); if (current != exit_block_ && current != entry_block_ && current != first) { - DCHECK(!current->IsInLoop()); DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - if (loop_info != nullptr) { + if (!current->IsInLoop()) { current->SetLoopInformation(loop_info); - for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + } else if (current->IsLoopHeader()) { + // Clear the information of which blocks are contained in that loop. Since the + // information is stored as a bit vector based on block ids, we have to update + // it, as those block ids were specific to the callee graph and we are now adding + // these blocks to the caller graph. + current->GetLoopInformation()->ClearAllBlocks(); + } + if (current->IsInLoop()) { + for (HLoopInformationOutwardIterator loop_it(*current); + !loop_it.Done(); + loop_it.Advance()) { loop_it.Current()->Add(current); } } @@ -1937,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_[++index_of_at] = to; if (loop_info != nullptr) { - to->SetLoopInformation(loop_info); + if (!to->IsInLoop()) { + to->SetLoopInformation(loop_info); + } for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(to); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 859d570b29..5246fd1f05 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { void Add(HBasicBlock* block); void Remove(HBasicBlock* block); + void ClearAllBlocks() { + blocks_.ClearAllBits(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -860,6 +864,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { HInstruction* GetLastPhi() const { return phis_.last_instruction_; } const HInstructionList& GetPhis() const { return phis_; } + HInstruction* GetFirstInstructionDisregardMoves() const; + void AddSuccessor(HBasicBlock* block) { successors_.push_back(block); block->predecessors_.push_back(this); @@ -3687,19 +3693,13 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(!IsStaticWithExplicitClinitCheck()); } - HNewInstance* GetThisArgumentOfStringInit() const { - DCHECK(IsStringInit()); - size_t index = InputCount() - 1; - DCHECK(InputAt(index)->IsNewInstance()); - return InputAt(index)->AsNewInstance(); - } - - void RemoveThisArgumentOfStringInit() { + HInstruction* GetAndRemoveThisArgumentOfStringInit() { DCHECK(IsStringInit()); size_t index = InputCount() - 1; - DCHECK(InputAt(index)->IsNewInstance()); + HInstruction* input = InputAt(index); RemoveAsUserOfInput(index); inputs_.pop_back(); + return input; } // Is this a call to a static method whose declaring class has an diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bb840eabdd..fffd00535c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -127,7 +127,7 @@ class PassObserver : public ValueObject { timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), - visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), + visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { @@ -305,30 +305,19 @@ class OptimizingCompiler FINAL : public Compiler { SHARED_REQUIRES(Locks::mutator_lock_); private: - // Whether we should run any optimization or register allocation. If false, will - // just run the code generation after the graph was built. - const bool run_optimizations_; - // Create a 'CompiledMethod' for an optimized graph. - CompiledMethod* EmitOptimized(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* driver) const; - - // Create a 'CompiledMethod' for a non-optimized graph. - CompiledMethod* EmitBaseline(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* driver) const; + CompiledMethod* Emit(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; // Try compiling a method and return the code generator used for // compiling it. // This method: // 1) Builds the graph. Returns null if it failed to build it. - // 2) If `run_optimizations_` is set: - // 2.1) Transform the graph to SSA. Returns null if it failed. - // 2.2) Run optimizations on the graph, including register allocator. - // 3) Generate code with the `code_allocator` provided. + // 2) Transforms the graph to SSA. Returns null if it failed. + // 3) Runs optimizations on the graph, including register allocator. + // 4) Generates code with the `code_allocator` provided. CodeGenerator* TryCompile(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, @@ -350,21 +339,19 @@ class OptimizingCompiler FINAL : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning), - run_optimizations_( - driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {} + : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {} void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); - const std::string cfg_file_name = driver->GetDumpCfgFileName(); + const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); if (!cfg_file_name.empty()) { CHECK_EQ(driver->GetThreadCount(), 1U) << "Graph visualizer requires the compiler to run single-threaded. " << "Invoke the compiler with '-j1'."; std::ios_base::openmode cfg_file_mode = - driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; + driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); } if (driver->GetDumpStats()) { @@ -577,17 +564,6 @@ static void RunOptimizations(HGraph* graph, AllocateRegisters(graph, codegen, pass_observer); } -// The stack map we generate must be 4-byte aligned on ARM. Since existing -// maps are generated alongside these stack maps, we must also align them. -static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) { - size_t size = vector.size(); - size_t aligned_size = RoundUp(size, 4); - for (; size < aligned_size; ++size) { - vector.push_back(0); - } - return ArrayRef<const uint8_t>(vector); -} - static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); @@ -601,10 +577,10 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) return linker_patches; } -CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { +CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); stack_map.resize(codegen->ComputeStackMapsSize()); @@ -630,39 +606,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, return compiled_method; } -CompiledMethod* OptimizingCompiler::EmitBaseline( - ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { - ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - - ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildMappingTable(&mapping_table); - ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildVMapTable(&vmap_table); - ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildNativeGCMap(&gc_map, *compiler_driver); - - CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - compiler_driver, - codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(code_allocator->GetMemory()), - // Follow Quick's behavior and set the frame size to zero if it is - // considered "empty" (see the definition of - // art::CodeGenerator::HasEmptyFrame). - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(), - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), - ArrayRef<const LinkerPatch>(linker_patches)); - return compiled_method; -} - CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, @@ -775,41 +718,37 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); - if (run_optimizations_) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - ScopedThreadSuspension sts(soa.Self(), kNative); - - { - PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); - GraphAnalysisResult result = graph->TryBuildingSsa(&handles); - if (result != kAnalysisSuccess) { - switch (result) { - case kAnalysisFailThrowCatchLoop: - MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); - break; - case kAnalysisFailAmbiguousArrayOp: - MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); - break; - case kAnalysisSuccess: - UNREACHABLE(); - } - pass_observer.SetGraphInBadState(); - return nullptr; + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + + { + PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); + GraphAnalysisResult result = graph->TryBuildingSsa(&handles); + if (result != kAnalysisSuccess) { + switch (result) { + case kAnalysisFailThrowCatchLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + break; + case kAnalysisFailAmbiguousArrayOp: + MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + break; + case kAnalysisSuccess: + UNREACHABLE(); } + pass_observer.SetGraphInBadState(); + return nullptr; } - - RunOptimizations(graph, - codegen.get(), - compiler_driver, - compilation_stats_.get(), - dex_compilation_unit, - &pass_observer, - &handles); - codegen->CompileOptimized(code_allocator); - } else { - codegen->CompileBaseline(code_allocator); } + + RunOptimizations(graph, + codegen.get(), + compiler_driver, + compilation_stats_.get(), + dex_compilation_unit, + &pass_observer, + &handles); + codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); if (kArenaAllocatorCountAllocations) { @@ -861,11 +800,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_cache)); if (codegen.get() != nullptr) { MaybeRecordStat(MethodCompilationStat::kCompiled); - if (run_optimizations_) { - method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); - } else { - method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver); - } + method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver); } } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { @@ -928,8 +863,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, { // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); - - DCHECK(run_optimizations_); codegen.reset( TryCompile(&arena, &code_allocator, diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 9d136f3ae6..be470ccb7d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -504,7 +504,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This function is used to reduce the dependencies in the graph after // (from -> to) has been performed. Since we ensure there is no move with the same - // destination, (to -> X) can not be blocked while (from -> X) might still be + // destination, (to -> X) cannot be blocked while (from -> X) might still be // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is // a dependency between the two. If we update the source location from 1 to 2, we diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 2bae4bc5c8..a966b62b4f 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -72,8 +72,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); - static constexpr bool kIsBaseline = false; - codegen->SetupBlockedRegisters(kIsBaseline); + codegen->SetupBlockedRegisters(); physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); // Always reserve for the current method and the graph's max out registers. diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 7494e336b1..165d09d1a5 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } +void SsaBuilder::RemoveRedundantUninitializedStrings() { + if (GetGraph()->IsDebuggable()) { + // Do not perform the optimization for consistency with the interpreter + // which always allocates an object for new-instance of String. + return; + } + + for (HNewInstance* new_instance : uninitialized_strings_) { + DCHECK(new_instance->IsStringAlloc()); + + // Replace NewInstance of String with NullConstant if not used prior to + // calling StringFactory. In case of deoptimization, the interpreter is + // expected to skip null check on the `this` argument of the StringFactory call. + if (!new_instance->HasNonEnvironmentUses()) { + new_instance->ReplaceWith(GetGraph()->GetNullConstant()); + new_instance->GetBlock()->RemoveInstruction(new_instance); + + // Remove LoadClass if not needed any more. + HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass(); + DCHECK(load_class != nullptr); + DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible"; + if (!load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } + } +} + GraphAnalysisResult SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block // visited (with the exception of loops) in order to create the right environment @@ -487,7 +515,15 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 11) Clear locals. + // 11) Step 1) replaced uses of NewInstances of String with the results of + // their corresponding StringFactory calls. Unless the String objects are used + // before they are initialized, they can be replaced with NullConstant. + // Note that this optimization is valid only if unsimplified code does not use + // the uninitialized value because we assume execution can be deoptimized at + // any safepoint. We must therefore perform it before any other optimizations. + RemoveRedundantUninitializedStrings(); + + // 12) Clear locals. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); it.Advance()) { @@ -891,12 +927,21 @@ void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStringInit()) { // This is a StringFactory call which acts as a String constructor. Its // result replaces the empty String pre-allocated by NewInstance. - HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit(); - invoke->RemoveThisArgumentOfStringInit(); + HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit(); + + // Replacing the NewInstance might render it redundant. Keep a list of these + // to be visited once it is clear whether it is has remaining uses. + if (arg_this->IsNewInstance()) { + uninitialized_strings_.push_back(arg_this->AsNewInstance()); + } else { + DCHECK(arg_this->IsPhi()); + // NewInstance is not the direct input of the StringFactory call. It might + // be redundant but optimizing this case is not worth the effort. + } - // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`. + // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { - if ((*current_locals_)[vreg] == new_instance) { + if ((*current_locals_)[vreg] == arg_this) { (*current_locals_)[vreg] = invoke; } } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 28eef6a40c..ccef8ea380 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -57,6 +57,7 @@ class SsaBuilder : public HGraphVisitor { loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), + uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), locals_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { @@ -105,6 +106,8 @@ class SsaBuilder : public HGraphVisitor { HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); + void RemoveRedundantUninitializedStrings(); + StackHandleScopeCollection* const handles_; // True if types of ambiguous ArrayGets have been resolved. @@ -119,6 +122,7 @@ class SsaBuilder : public HGraphVisitor { ArenaVector<HArrayGet*> ambiguous_agets_; ArenaVector<HArraySet*> ambiguous_asets_; + ArenaVector<HNewInstance*> uninitialized_strings_; // HEnvironment for each block. ArenaVector<ArenaVector<HInstruction*>> locals_for_; |