diff options
Diffstat (limited to 'compiler/optimizing')
64 files changed, 2468 insertions, 848 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 9c2068ec5e..d893cc88c4 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -302,7 +302,7 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } - bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + bool IsConstantValueRange() const { return lower_.IsConstant() && upper_.IsConstant(); } // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { @@ -789,24 +789,33 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondNE || cond == kCondEQ) { - if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) { - // Special case: - // length == [c,d] yields [c, d] along true - // length != [c,d] yields [c, d] along false - if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { - ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? true_successor : false_successor, new_range); - } - // In addition: - // length == 0 yields [1, max] along false - // length != 0 yields [1, max] along true - if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { - ValueRange* new_range = new (&allocator_) ValueRange( - &allocator_, ValueBound(nullptr, 1), ValueBound::Max()); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + if (left->IsArrayLength()) { + if (lower.IsConstant() && upper.IsConstant()) { + // Special case: + // length == [c,d] yields [c, d] along true + // length != [c,d] yields [c, d] along false + if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); + } + // In addition: + // length == 0 yields [1, max] along false + // length != 0 yields [1, max] along true + if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, 1), ValueBound::Max()); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + } } + } else if (lower.IsRelatedToArrayLength() && lower.Equals(upper)) { + // Special aliasing case, with x not array length itself: + // x == [length,length] yields x == length along true + // x != [length,length] yields x == length along false + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); } } } @@ -827,9 +836,23 @@ class BCEVisitor : public HGraphVisitor { ValueRange array_range(&allocator_, lower, upper); // Try index range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceInstruction(bounds_check, index); - return; + if (index_range != nullptr) { + if (index_range->FitsIn(&array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } else if (index_range->IsConstantValueRange()) { + // If the non-constant index turns out to have a constant range, + // make one more attempt to get a constant in the array range. + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr && + existing_range->IsConstantValueRange()) { + ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + if (index_range->FitsIn(&constant_array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } + } + } } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index af537dd653..a1a5692ef6 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -43,7 +43,7 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, VariableSizedHandleScope* handles) : graph_(graph), dex_file_(&graph->GetDexFile()), @@ -70,7 +70,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, compiler_driver_(nullptr), code_generator_(nullptr), compilation_stats_(nullptr), - interpreter_metadata_(nullptr), handles_(handles), return_type_(return_type) {} diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c16a3a928d..5a1914ce08 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ #include "base/arena_object.h" +#include "base/array_ref.h" #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" @@ -40,7 +41,7 @@ class HGraphBuilder : public ValueObject { CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, VariableSizedHandleScope* handles); // Only for unit testing. @@ -73,7 +74,7 @@ class HGraphBuilder : public ValueObject { CodeGenerator* const code_generator_; OptimizingCompilerStats* const compilation_stats_; - const uint8_t* const interpreter_metadata_; + const ArrayRef<const uint8_t> interpreter_metadata_; VariableSizedHandleScope* const handles_; const DataType::Type return_type_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 13886b32b3..3fd88e3e18 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1488,6 +1488,14 @@ void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); if (do_overflow_check) { @@ -1881,6 +1889,8 @@ void CodeGeneratorARM64::Load(DataType::Type type, DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); __ Ldr(dst, src); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -1959,6 +1969,8 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, __ Fmov(FPRegister(dst), temp); break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -1986,6 +1998,8 @@ void CodeGeneratorARM64::Store(DataType::Type type, DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); __ Str(src, dst); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -2063,6 +2077,8 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -2097,13 +2113,17 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); // Even if the initialized flag is set, we need to ensure consistent memory ordering. // TODO(vixl): Let the MacroAssembler handle MemOperand. - __ Add(temp, class_reg, status_offset); + __ Add(temp, class_reg, status_byte_offset); __ Ldarb(temp, HeapOperand(temp)); - __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized)); + __ Cmp(temp, shifted_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3487,12 +3507,25 @@ void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp1 = temps.AcquireX(); + Register temp2 = temps.AcquireX(); + __ Ldr(temp1, MemOperand(sp, 0)); + __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp2, temp2, 1); + __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 7c6a5fde40..6d49b32dbc 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2485,13 +2485,36 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + if (HasEmptyFrame()) { return; } if (!skip_overflow_check) { + // Using r4 instead of IP saves 2 bytes. UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + vixl32::Register temp; + // TODO: Remove this check when R4 is made a callee-save register + // in ART compiled code (b/72801708). Currently we need to make + // sure r4 is not blocked, e.g. in special purpose + // TestCodeGeneratorARMVIXL; also asserting that r4 is available + // here. + if (!blocked_core_registers_[R4]) { + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + temp = r4; + } else { + temp = temps.Acquire(); + } __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), @@ -2642,6 +2665,8 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -2657,6 +2682,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: { return LocationFrom(r0); } @@ -2665,6 +2691,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T return LocationFrom(s0); } + case DataType::Type::kUint64: case DataType::Type::kInt64: { return LocationFrom(r0, r1); } @@ -2776,12 +2803,26 @@ void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poi } void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Pop(vixl32::Register(kMethodRegister)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -5490,6 +5531,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -5734,6 +5777,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -6226,6 +6271,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -6515,6 +6562,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); @@ -7172,11 +7221,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadFromOffset(kLoadUnsignedByte, - temp, - class_reg, - mirror::Class::StatusOffset().Int32Value()); - __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset); + __ Cmp(temp, shifted_initialized_value); __ B(lo, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we may be in a situation where caches are not synced // properly. Therefore, we do a memory fence. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index ebe252a9c8..855da2b18f 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -58,9 +58,11 @@ Location MipsReturnLocation(DataType::Type return_type) { case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: return Location::RegisterLocation(V0); + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterPairLocation(V0, V1); @@ -140,6 +142,8 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -391,7 +395,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -1276,6 +1280,10 @@ static dwarf::Reg DWARFReg(Register reg) { void CodeGeneratorMIPS::GenerateFrameEntry() { __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + LOG(WARNING) << "Unimplemented hotness update in mips backend"; + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod(); @@ -1915,8 +1923,14 @@ void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool d void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg) { - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ LoadConst32(AT, shifted_initialized_value); __ Bltu(TMP, AT, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); @@ -2811,6 +2825,8 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3126,6 +3142,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3265,26 +3283,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -3313,18 +3313,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); MipsLabel done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, is_type_check_slow_path_fatal); @@ -4028,7 +4017,11 @@ void LocationsBuilderMIPS::VisitGoto(HGoto* got) { } void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); @@ -6306,6 +6299,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, case DataType::Type::kFloat64: load_type = kLoadDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -6459,6 +6454,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -7187,11 +7184,12 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -7239,13 +7237,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -7253,13 +7253,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; @@ -7269,7 +7271,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ Bne(out, cls, &loop); @@ -7278,13 +7280,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); @@ -7294,7 +7298,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -7304,13 +7308,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. MipsLabel success; __ Beq(out, cls, &success); @@ -7320,7 +7326,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3ea7b827bb..8a06061c6a 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -55,8 +55,10 @@ Location Mips64ReturnLocation(DataType::Type return_type) { case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: case DataType::Type::kReference: + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterLocation(V0); @@ -350,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -1079,6 +1081,10 @@ static dwarf::Reg DWARFReg(FpuRegister reg) { void CodeGeneratorMIPS64::GenerateFrameEntry() { __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + LOG(WARNING) << "Unimplemented hotness update in mips64 backend"; + } + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod(); @@ -1761,8 +1767,14 @@ void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg) { - __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized)); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ LoadConst32(AT, shifted_initialized_value); __ Bltuc(TMP, AT, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); @@ -2398,6 +2410,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -2701,6 +2715,8 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -2820,26 +2836,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -2868,18 +2866,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); Mips64Label done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, is_type_check_slow_path_fatal); @@ -3556,7 +3543,11 @@ void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); @@ -4784,6 +4775,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: load_type = kLoadUnsignedWord; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -4877,6 +4870,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -5504,11 +5499,12 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -5556,13 +5552,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -5570,13 +5568,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; @@ -5586,7 +5586,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ Bnec(out, cls, &loop); @@ -5595,13 +5595,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); @@ -5611,7 +5613,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -5621,13 +5623,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. Mips64Label success; __ Beqc(out, cls, &success); @@ -5637,7 +5641,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 152a59c208..174efdf115 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -606,22 +606,20 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: @@ -656,22 +654,20 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index cc470ddb2e..7c3155ab73 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -431,13 +431,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vmin(DataTypeValue::S16, dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmin(DataTypeValue::U32, dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmin(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmin(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -471,13 +471,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vmax(DataTypeValue::S16, dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmax(DataTypeValue::U32, dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmax(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmax(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index 3cf150a6b8..ed9de96496 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -613,32 +613,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Min_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -673,32 +671,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Max_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 2d69533f21..9ea55ec8d7 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -612,32 +612,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Min_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -672,32 +670,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ Max_sH(dst, lhs, rhs); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_uW(dst, lhs, rhs); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_sW(dst, lhs, rhs); + break; + case DataType::Type::kUint64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_uD(dst, lhs, rhs); break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index ad8128a5b1..f2ffccc887 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -92,8 +92,8 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ pshufd(dst, dst, Immediate(0)); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); __ punpckldq(dst, tmp); @@ -101,13 +101,13 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i break; } case DataType::Type::kFloat32: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufps(dst, dst, Immediate(0)); break; case DataType::Type::kFloat64: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufpd(dst, dst, Immediate(0)); break; default: @@ -160,8 +160,8 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr __ movd(locations->Out().AsRegister<Register>(), src); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(locations->Out().AsRegisterPairLow<Register>(), src); __ pshufd(tmp, src, Immediate(1)); __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); @@ -640,23 +640,21 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pminsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -691,23 +689,21 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmaxsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: @@ -1022,8 +1018,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movd(dst, locations->InAt(0).AsRegister<Register>()); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ xorps(tmp, tmp); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 107030e6c2..e2b0485f89 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -623,23 +623,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pminsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -674,23 +672,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmaxsw(dst, src); break; + case DataType::Type::kUint32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxud(dst, src); + break; case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 68532386e1..5fede80bc7 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1061,6 +1061,11 @@ void CodeGeneratorX86::GenerateFrameEntry() { IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); @@ -1129,9 +1134,11 @@ Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: return Location::RegisterLocation(EAX); + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterPairLocation(EAX, EDX); @@ -1201,6 +1208,8 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -1347,13 +1356,22 @@ void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* loc } void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); + __ popl(EAX); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -4830,6 +4848,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -5003,6 +5023,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -5306,6 +5328,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -5557,6 +5581,8 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -6219,8 +6245,13 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(enum_cast<>(ClassStatus::kInitialized))); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the X86 memory model. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 1f8d822507..ae35ab5983 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1268,6 +1268,12 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); @@ -1449,13 +1455,21 @@ void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* } void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); + __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2259,7 +2273,9 @@ Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Ty case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: + case DataType::Type::kUint32: case DataType::Type::kInt32: + case DataType::Type::kUint64: case DataType::Type::kInt64: return Location::RegisterLocation(RAX); @@ -2328,6 +2344,8 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type } } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; @@ -4293,6 +4311,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); @@ -4456,6 +4476,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); @@ -4749,6 +4771,8 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -4988,6 +5012,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -5425,8 +5451,13 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(enum_cast<>(ClassStatus::kInitialized))); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the x86-64 memory model. diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index d8ebac95a8..f4760d661f 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -34,7 +34,9 @@ void CodeSinking::Run() { // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { - if (exit_predecessor->GetLastInstruction()->IsThrow()) { + HInstruction* last = exit_predecessor->GetLastInstruction(); + // Any predecessor of the exit that does not return, throws an exception. + if (!last->IsReturn() && !last->IsReturnVoid()) { SinkCodeToUncommonBranch(exit_predecessor); } } diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6eda289861..ba4040acad 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -74,8 +74,8 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { class CodegenTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0); - void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected); + void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0); + void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected); void TestComparison(IfCondition condition, int64_t i, int64_t j, @@ -83,7 +83,7 @@ class CodegenTest : public OptimizingUnitTest { const CodegenTargetConfig target_config); }; -void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expected) { +void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ResetPoolAndAllocator(); HGraph* graph = CreateCFG(data); @@ -93,7 +93,8 @@ void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expect } } -void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { +void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, + bool has_result, int64_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ResetPoolAndAllocator(); HGraph* graph = CreateCFG(data, DataType::Type::kInt64); @@ -104,12 +105,12 @@ void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t ex } TEST_F(CodegenTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); TestCode(data); } TEST_F(CodegenTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -117,7 +118,7 @@ TEST_F(CodegenTest, CFG1) { } TEST_F(CodegenTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -126,21 +127,21 @@ TEST_F(CodegenTest, CFG2) { } TEST_F(CodegenTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -149,7 +150,7 @@ TEST_F(CodegenTest, CFG3) { } TEST_F(CodegenTest, CFG4) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -158,7 +159,7 @@ TEST_F(CodegenTest, CFG4) { } TEST_F(CodegenTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -168,7 +169,7 @@ TEST_F(CodegenTest, CFG5) { } TEST_F(CodegenTest, IntConstant) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -176,7 +177,7 @@ TEST_F(CodegenTest, IntConstant) { } TEST_F(CodegenTest, Return1) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); @@ -184,7 +185,7 @@ TEST_F(CodegenTest, Return1) { } TEST_F(CodegenTest, Return2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); @@ -193,7 +194,7 @@ TEST_F(CodegenTest, Return2) { } TEST_F(CodegenTest, Return3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); @@ -202,7 +203,7 @@ TEST_F(CodegenTest, Return3) { } TEST_F(CodegenTest, ReturnIf1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ, 3, @@ -213,7 +214,7 @@ TEST_F(CodegenTest, ReturnIf1) { } TEST_F(CodegenTest, ReturnIf2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ | 0 << 4 | 1 << 8, 3, @@ -224,17 +225,17 @@ TEST_F(CodegenTest, ReturnIf2) { } // Exercise bit-wise (one's complement) not-int instruction. -#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_F(CodegenTest, TEST_NAME) { \ - const int32_t input = INPUT; \ - const uint16_t input_lo = Low16Bits(input); \ - const uint16_t input_hi = High16Bits(input); \ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \ - Instruction::CONST | 0 << 8, input_lo, input_hi, \ - Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ - Instruction::RETURN | 1 << 8); \ - \ - TestCode(data, true, EXPECTED_OUTPUT); \ +#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ +TEST_F(CodegenTest, TEST_NAME) { \ + const int32_t input = INPUT; \ + const uint16_t input_lo = Low16Bits(input); \ + const uint16_t input_hi = High16Bits(input); \ + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( \ + Instruction::CONST | 0 << 8, input_lo, input_hi, \ + Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ + Instruction::RETURN | 1 << 8); \ + \ + TestCode(data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -256,7 +257,7 @@ TEST_F(CodegenTest, TEST_NAME) { \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ const uint16_t word2 = Low16Bits(High32Bits(input)); \ const uint16_t word3 = High16Bits(High32Bits(input)); /* MSW. */ \ - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( \ + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( \ Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ @@ -306,7 +307,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FIVE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FIVE_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::CONST_WIDE | 2 << 8, 1, 0, 0, 0, Instruction::ADD_LONG | 0, 0 << 8 | 2, // v0 <- 2^32 + 1 @@ -318,7 +319,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { } TEST_F(CodegenTest, ReturnAdd1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, @@ -328,7 +329,7 @@ TEST_F(CodegenTest, ReturnAdd1) { } TEST_F(CodegenTest, ReturnAdd2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -338,7 +339,7 @@ TEST_F(CodegenTest, ReturnAdd2) { } TEST_F(CodegenTest, ReturnAdd3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -347,7 +348,7 @@ TEST_F(CodegenTest, ReturnAdd3) { } TEST_F(CodegenTest, ReturnAdd4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); @@ -356,7 +357,7 @@ TEST_F(CodegenTest, ReturnAdd4) { } TEST_F(CodegenTest, ReturnMulInt) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, @@ -366,7 +367,7 @@ TEST_F(CodegenTest, ReturnMulInt) { } TEST_F(CodegenTest, ReturnMulInt2addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, @@ -376,7 +377,7 @@ TEST_F(CodegenTest, ReturnMulInt2addr) { } TEST_F(CodegenTest, ReturnMulLong) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG, 2 << 8 | 0, @@ -386,7 +387,7 @@ TEST_F(CodegenTest, ReturnMulLong) { } TEST_F(CodegenTest, ReturnMulLong2addr) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG_2ADDR | 2 << 12, @@ -396,7 +397,7 @@ TEST_F(CodegenTest, ReturnMulLong2addr) { } TEST_F(CodegenTest, ReturnMulIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -405,7 +406,7 @@ TEST_F(CodegenTest, ReturnMulIntLit8) { } TEST_F(CodegenTest, ReturnMulIntLit16) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); @@ -578,7 +579,7 @@ TEST_F(CodegenTest, MaterializedCondition2) { } TEST_F(CodegenTest, ReturnDivIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -587,7 +588,7 @@ TEST_F(CodegenTest, ReturnDivIntLit8) { } TEST_F(CodegenTest, ReturnDivInt2Addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index e1980e080e..d27104752b 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -36,7 +36,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { public: ConstantFoldingTest() : graph_(nullptr) { } - void TestCode(const uint16_t* data, + void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after_cf, const std::string& expected_after_dce, @@ -100,7 +100,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { * return v1 2. return v1 */ TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::NEG_INT | 1 << 8 | 0 << 12, Instruction::RETURN | 1 << 8); @@ -161,7 +161,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::NEG_LONG | 2 << 8 | 0 << 12, Instruction::RETURN_WIDE | 2 << 8); @@ -219,7 +219,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -284,7 +284,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { * return v2 8. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12, @@ -369,7 +369,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 3 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::SUB_INT | 2 << 8, 0 | 1 << 8, @@ -432,7 +432,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 1, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8, @@ -496,7 +496,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 3, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8, @@ -569,7 +569,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { * return v2 13. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -672,7 +672,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { * return-void 7. return */ TEST_F(ConstantFoldingTest, ConstantCondition) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h index e389bad3ad..e2cf7a80fe 100644 --- a/compiler/optimizing/data_type-inl.h +++ b/compiler/optimizing/data_type-inl.h @@ -53,7 +53,9 @@ constexpr char DataType::TypeId(DataType::Type type) { case DataType::Type::kInt8: return 'b'; // Java byte (B). case DataType::Type::kUint16: return 'c'; // Java char (C). case DataType::Type::kInt16: return 's'; // Java short (S). + case DataType::Type::kUint32: return 'u'; // Picked 'u' for unsigned. case DataType::Type::kInt32: return 'i'; // Java int (I). + case DataType::Type::kUint64: return 'w'; // Picked 'w' for long unsigned. case DataType::Type::kInt64: return 'j'; // Java long (J). case DataType::Type::kFloat32: return 'f'; // Java float (F). case DataType::Type::kFloat64: return 'd'; // Java double (D). diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc index 3c99a76c17..cb354f46cc 100644 --- a/compiler/optimizing/data_type.cc +++ b/compiler/optimizing/data_type.cc @@ -25,7 +25,9 @@ static const char* kTypeNames[] = { "Int8", "Uint16", "Int16", + "Uint32", "Int32", + "Uint64", "Int64", "Float32", "Float64", diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 548fe28cee..4a6c91459f 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -34,7 +34,9 @@ class DataType { kInt8, kUint16, kInt16, + kUint32, kInt32, + kUint64, kInt64, kFloat32, kFloat64, @@ -55,9 +57,11 @@ class DataType { case Type::kUint16: case Type::kInt16: return 1; + case Type::kUint32: case Type::kInt32: case Type::kFloat32: return 2; + case Type::kUint64: case Type::kInt64: case Type::kFloat64: return 3; @@ -80,9 +84,11 @@ class DataType { case Type::kUint16: case Type::kInt16: return 2; + case Type::kUint32: case Type::kInt32: case Type::kFloat32: return 4; + case Type::kUint64: case Type::kInt64: case Type::kFloat64: return 8; @@ -107,7 +113,9 @@ class DataType { case Type::kInt8: case Type::kUint16: case Type::kInt16: + case Type::kUint32: case Type::kInt32: + case Type::kUint64: case Type::kInt64: return true; default: @@ -120,11 +128,12 @@ class DataType { } static bool Is64BitType(Type type) { - return type == Type::kInt64 || type == Type::kFloat64; + return type == Type::kUint64 || type == Type::kInt64 || type == Type::kFloat64; } static bool IsUnsignedType(Type type) { - return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16; + return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16 || + type == Type::kUint32 || type == Type::kUint64; } // Return the general kind of `type`, fusing integer-like types as Type::kInt. @@ -133,10 +142,14 @@ class DataType { case Type::kBool: case Type::kUint8: case Type::kInt8: - case Type::kInt16: case Type::kUint16: + case Type::kInt16: + case Type::kUint32: case Type::kInt32: return Type::kInt32; + case Type::kUint64: + case Type::kInt64: + return Type::kInt64; default: return type; } @@ -154,8 +167,12 @@ class DataType { return std::numeric_limits<uint16_t>::min(); case Type::kInt16: return std::numeric_limits<int16_t>::min(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::min(); case Type::kInt32: return std::numeric_limits<int32_t>::min(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::min(); case Type::kInt64: return std::numeric_limits<int64_t>::min(); default: @@ -176,8 +193,12 @@ class DataType { return std::numeric_limits<uint16_t>::max(); case Type::kInt16: return std::numeric_limits<int16_t>::max(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::max(); case Type::kInt32: return std::numeric_limits<int32_t>::max(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::max(); case Type::kInt64: return std::numeric_limits<int64_t>::max(); default: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 3cc7b0e78d..9fa0f72e80 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -146,6 +146,141 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi } } +static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) { + // Test for an if as last statement. + if (!block->EndsWithIf()) { + return false; + } + HIf* ifs = block->GetLastInstruction()->AsIf(); + // Find either: + // if obj == null + // throws + // else + // not_throws + // or: + // if obj != null + // not_throws + // else + // throws + HInstruction* cond = ifs->InputAt(0); + HBasicBlock* not_throws = nullptr; + if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) { + not_throws = ifs->IfFalseSuccessor(); + } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) { + not_throws = ifs->IfTrueSuccessor(); + } else { + return false; + } + DCHECK(cond->IsEqual() || cond->IsNotEqual()); + HInstruction* obj = cond->InputAt(1); + if (obj->IsNullConstant()) { + obj = cond->InputAt(0); + } else if (!cond->InputAt(0)->IsNullConstant()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. + HBoundType* bound = nullptr; + const HUseList<HInstruction*>& uses = obj->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + ++it; // increment before possibly replacing + if (user->IsNullCheck()) { + HBasicBlock* user_block = user->GetBlock(); + if (user_block != block && + user_block != throws && + block->Dominates(user_block)) { + if (bound == nullptr) { + ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); + bound->SetUpperBound(ti, /*can_be_null*/ false); + bound->SetReferenceTypeInfo(ti); + bound->SetCanBeNull(false); + not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction()); + } + user->ReplaceWith(bound); + user_block->RemoveInstruction(user); + } + } + } + return bound != nullptr; +} + +// Simplify the pattern: +// +// B1 +// / \ +// | foo() // always throws +// \ goto B2 +// \ / +// B2 +// +// Into: +// +// B1 +// / \ +// | foo() +// | goto Exit +// | | +// B2 Exit +// +// Rationale: +// Removal of the never taken edge to B2 may expose +// other optimization opportunities, such as code sinking. +bool HDeadCodeElimination::SimplifyAlwaysThrows() { + // Make sure exceptions go to exit. + if (graph_->HasTryCatch()) { + return false; + } + HBasicBlock* exit = graph_->GetExitBlock(); + if (exit == nullptr) { + return false; + } + + bool rerun_dominance_and_loop_analysis = false; + + // Order does not matter, just pick one. + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + HInstruction* first = block->GetFirstInstruction(); + HInstruction* last = block->GetLastInstruction(); + // Ensure only one throwing instruction appears before goto. + if (first->AlwaysThrows() && + first->GetNext() == last && + last->IsGoto() && + block->GetPhis().IsEmpty() && + block->GetPredecessors().size() == 1u) { + DCHECK_EQ(block->GetSuccessors().size(), 1u); + HBasicBlock* pred = block->GetSinglePredecessor(); + HBasicBlock* succ = block->GetSingleSuccessor(); + // Ensure no computations are merged through throwing block. + // This does not prevent the optimization per se, but would + // require an elaborate clean up of the SSA graph. + if (succ != exit && + !block->Dominates(pred) && + pred->Dominates(succ) && + succ->GetPredecessors().size() > 1u && + succ->GetPhis().IsEmpty()) { + block->ReplaceSuccessor(succ, exit); + rerun_dominance_and_loop_analysis = true; + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + if (RemoveNonNullControlDependences(pred, block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); + } + } + } + } + + // We need to re-analyze the graph in order to run DCE afterwards. + if (rerun_dominance_and_loop_analysis) { + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + return true; + } + return false; +} + // Simplify the pattern: // // B1 B2 ... @@ -381,6 +516,7 @@ void HDeadCodeElimination::Run() { // Simplify graph to generate more dead block patterns. ConnectSuccessiveBlocks(); bool did_any_simplification = false; + did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); did_any_simplification |= RemoveDeadBlocks(); if (did_any_simplification) { diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 84fd890eee..92a7f562e1 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -40,6 +40,7 @@ class HDeadCodeElimination : public HOptimization { void MaybeRecordSimplifyIf(); bool RemoveDeadBlocks(); void RemoveDeadInstructions(); + bool SimplifyAlwaysThrows(); bool SimplifyIfs(); void ConnectSuccessiveBlocks(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 929572ee3b..adb6ce1187 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -29,12 +29,12 @@ namespace art { class DeadCodeEliminationTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, + void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after); }; -void DeadCodeEliminationTest::TestCode(const uint16_t* data, +void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after) { HGraph* graph = CreateCFG(data); @@ -73,7 +73,7 @@ void DeadCodeEliminationTest::TestCode(const uint16_t* data, * return-void 7. return */ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, @@ -135,7 +135,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { * return 13. return-void */ TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 572466eec8..1d72ba116e 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -26,10 +26,12 @@ namespace art { class OptimizerTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length); + void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length); }; -void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) { +void OptimizerTest::TestCode(const std::vector<uint16_t>& data, + const uint32_t* blocks, + size_t blocks_length) { HGraph* graph = CreateCFG(data); ASSERT_EQ(graph->GetBlocks().size(), blocks_length); for (size_t i = 0, e = blocks_length; i < e; ++i) { @@ -49,7 +51,7 @@ void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_ } TEST_F(OptimizerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); // Block number 1 const uint32_t dominators[] = { @@ -62,7 +64,7 @@ TEST_F(OptimizerTest, ReturnVoid) { } TEST_F(OptimizerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::RETURN_VOID); // Block number 2 @@ -77,7 +79,7 @@ TEST_F(OptimizerTest, CFG1) { } TEST_F(OptimizerTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 Instruction::RETURN_VOID); // Block number 3 @@ -94,7 +96,7 @@ TEST_F(OptimizerTest, CFG2) { } TEST_F(OptimizerTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, // Block number 1 Instruction::RETURN_VOID, // Block number 2 Instruction::GOTO | 0xFF00); // Block number 3 @@ -109,14 +111,14 @@ TEST_F(OptimizerTest, CFG3) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -125,7 +127,7 @@ TEST_F(OptimizerTest, CFG3) { } TEST_F(OptimizerTest, CFG4) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -138,14 +140,14 @@ TEST_F(OptimizerTest, CFG4) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); } TEST_F(OptimizerTest, CFG5) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, // Block number 1 Instruction::GOTO | 0x100, // Dead block Instruction::GOTO | 0xFE00); // Block number 2 @@ -162,7 +164,7 @@ TEST_F(OptimizerTest, CFG5) { } TEST_F(OptimizerTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -181,7 +183,7 @@ TEST_F(OptimizerTest, CFG6) { } TEST_F(OptimizerTest, CFG7) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 @@ -201,7 +203,7 @@ TEST_F(OptimizerTest, CFG7) { } TEST_F(OptimizerTest, CFG8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -222,7 +224,7 @@ TEST_F(OptimizerTest, CFG8) { } TEST_F(OptimizerTest, CFG9) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -243,7 +245,7 @@ TEST_F(OptimizerTest, CFG9) { } TEST_F(OptimizerTest, CFG10) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, // Block number 1 Instruction::IF_EQ, 3, // Block number 2 diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index b799fb4688..75b8e9609e 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -31,7 +31,7 @@ class FindLoopsTest : public OptimizingUnitTest {}; TEST_F(FindLoopsTest, CFG1) { // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -42,7 +42,7 @@ TEST_F(FindLoopsTest, CFG1) { } TEST_F(FindLoopsTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -53,7 +53,7 @@ TEST_F(FindLoopsTest, CFG2) { } TEST_F(FindLoopsTest, CFG3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -67,7 +67,7 @@ TEST_F(FindLoopsTest, CFG3) { } TEST_F(FindLoopsTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -82,7 +82,7 @@ TEST_F(FindLoopsTest, CFG4) { } TEST_F(FindLoopsTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -126,7 +126,7 @@ TEST_F(FindLoopsTest, Loop1) { // while (a == a) { // } // return; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, @@ -150,7 +150,7 @@ TEST_F(FindLoopsTest, Loop2) { // while (a == a) { // } // return a; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x400, Instruction::IF_EQ, 4, @@ -173,7 +173,7 @@ TEST_F(FindLoopsTest, Loop2) { TEST_F(FindLoopsTest, Loop3) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -197,7 +197,7 @@ TEST_F(FindLoopsTest, Loop3) { TEST_F(FindLoopsTest, Loop4) { // Test loop with originally two back edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -221,7 +221,7 @@ TEST_F(FindLoopsTest, Loop4) { TEST_F(FindLoopsTest, Loop5) { // Test loop with two exit edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -244,7 +244,7 @@ TEST_F(FindLoopsTest, Loop5) { } TEST_F(FindLoopsTest, InnerLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -273,7 +273,7 @@ TEST_F(FindLoopsTest, InnerLoop) { } TEST_F(FindLoopsTest, TwoLoops) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, // first loop @@ -301,7 +301,7 @@ TEST_F(FindLoopsTest, TwoLoops) { } TEST_F(FindLoopsTest, NonNaturalLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x0100, @@ -317,7 +317,7 @@ TEST_F(FindLoopsTest, NonNaturalLoop) { } TEST_F(FindLoopsTest, DoWhileLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0xFFFF, diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index b1ac027a68..c88baa8610 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -31,7 +31,15 @@ namespace art { using android::base::StringPrintf; static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) { - return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid(); + // Anything that returns is allowed to jump into the exit block. + if (instruction->IsReturn() || instruction->IsReturnVoid()) { + return true; + } + // Anything that always throws is allowed to jump into the exit block. + if (instruction->IsGoto() && instruction->GetPrevious() != nullptr) { + instruction = instruction->GetPrevious(); + } + return instruction->AlwaysThrows(); } static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 9ca3e4953a..08bfa5d80f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -22,7 +22,7 @@ namespace art { class GraphCheckerTest : public OptimizingUnitTest { protected: HGraph* CreateSimpleCFG(); - void TestCode(const uint16_t* data); + void TestCode(const std::vector<uint16_t>& data); }; /** @@ -48,7 +48,7 @@ HGraph* GraphCheckerTest::CreateSimpleCFG() { return graph; } -void GraphCheckerTest::TestCode(const uint16_t* data) { +void GraphCheckerTest::TestCode(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); ASSERT_NE(graph, nullptr); @@ -58,14 +58,14 @@ void GraphCheckerTest::TestCode(const uint16_t* data) { } TEST_F(GraphCheckerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); TestCode(data); } TEST_F(GraphCheckerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -73,7 +73,7 @@ TEST_F(GraphCheckerTest, CFG1) { } TEST_F(GraphCheckerTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -83,7 +83,7 @@ TEST_F(GraphCheckerTest, CFG2) { } TEST_F(GraphCheckerTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -128,7 +128,7 @@ TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) { TEST_F(GraphCheckerTest, SSAPhi) { // This code creates one Phi function during the conversion to SSA form. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 12c69889ab..6144162f68 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -533,20 +533,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { VisitVecBinaryOperation(hadd); - StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha; StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } - void VisitVecMin(HVecMin* min) OVERRIDE { - VisitVecBinaryOperation(min); - StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; - } - - void VisitVecMax(HVecMax* max) OVERRIDE { - VisitVecBinaryOperation(max); - StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; - } - void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { VisitVecOperation(instruction); StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index b2ad8ec400..035e5ce3e1 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -392,6 +392,35 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } +static bool AlwaysThrows(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + CodeItemDataAccessor accessor(method->DexInstructionData()); + // Skip native methods, methods with try blocks, and methods that are too large. + if (!accessor.HasCodeItem() || + accessor.TriesSize() != 0 || + accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) { + return false; + } + // Scan for exits. + bool throw_seen = false; + for (const DexInstructionPcPair& pair : accessor) { + switch (pair.Inst().Opcode()) { + case Instruction::RETURN: + case Instruction::RETURN_VOID: + case Instruction::RETURN_WIDE: + case Instruction::RETURN_OBJECT: + case Instruction::RETURN_VOID_NO_BARRIER: + return false; // found regular control flow back + case Instruction::THROW: + throw_seen = true; + break; + default: + break; + } + } + return throw_seen; +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || invoke_instruction->IsInvokePolymorphic()) { @@ -431,20 +460,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } if (actual_method != nullptr) { + // Single target. bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), /* do_rtp */ true, cha_devirtualize); - if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { - if (cha_devirtualize) { - // Add dependency due to devirtulization. We've assumed resolved_method - // has single implementation. - outermost_graph_->AddCHASingleImplementationDependency(resolved_method); - MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); - } else { - MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); + if (result) { + // Successfully inlined. + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + if (cha_devirtualize) { + // Add dependency due to devirtualization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); + } else { + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); + } } + } else if (!cha_devirtualize && AlwaysThrows(actual_method)) { + // Set always throws property for non-inlined method call with single target + // (unless it was obtained through CHA, because that would imply we have + // to add the CHA dependency, which seems not worth it). + invoke_instruction->SetAlwaysThrows(true); } return result; } @@ -1381,7 +1419,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); - CodeItemDataAccessor accessor(method); + CodeItemDataAccessor accessor(method->DexInstructionData()); if (!accessor.HasCodeItem()) { LOG_FAIL_NO_STAT() @@ -1660,7 +1698,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); - CodeItemDebugInfoAccessor code_item_accessor(callee_dex_file, code_item); + CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), caller_compilation_unit_.GetDexCache(), diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index e81d97b0a8..02465d37ba 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -18,7 +18,7 @@ #define ART_COMPILER_OPTIMIZING_INLINER_H_ #include "dex/dex_file_types.h" -#include "invoke_type.h" +#include "dex/invoke_type.h" #include "jit/profile_compilation_info.h" #include "optimization.h" diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 72a93c1f77..64a1eccf60 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -49,7 +49,7 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, const DexCompilationUnit* outer_compilation_unit, CompilerDriver* compiler_driver, CodeGenerator* code_generator, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, VariableSizedHandleScope* handles, ScopedArenaAllocator* local_allocator) diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 708a09711a..4428c53277 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ +#include "base/array_ref.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "data_type.h" @@ -57,7 +58,7 @@ class HInstructionBuilder : public ValueObject { const DexCompilationUnit* outer_compilation_unit, CompilerDriver* compiler_driver, CodeGenerator* code_generator, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, VariableSizedHandleScope* handles, ScopedArenaAllocator* local_allocator); diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 6928b70df7..acb830e524 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -19,9 +19,9 @@ #include "art_field-inl.h" #include "art_method-inl.h" #include "class_linker.h" +#include "dex/invoke_type.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca1b451e6b..2f8e33f941 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 99b8b5df74..830d0403e4 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 113c9de5a2..cafa5228d9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 521bad27e2..89f1818be2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index baa410b884..46b7f3f1ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6dd8b8e1f5..6483b7cb2a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 43b63a73ef..9fa5b74c62 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -35,11 +35,12 @@ namespace art { class LinearizeTest : public OptimizingUnitTest { protected: template <size_t number_of_blocks> - void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]); + void TestCode(const std::vector<uint16_t>& data, + const uint32_t (&expected_order)[number_of_blocks]); }; template <size_t number_of_blocks> -void LinearizeTest::TestCode(const uint16_t* data, +void LinearizeTest::TestCode(const std::vector<uint16_t>& data, const uint32_t (&expected_order)[number_of_blocks]) { HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( @@ -68,7 +69,7 @@ TEST_F(LinearizeTest, CFG1) { // + / \ + // Block4 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::IF_EQ, 0xFFFE, @@ -93,7 +94,7 @@ TEST_F(LinearizeTest, CFG2) { // + / \ + // Block5 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -119,7 +120,7 @@ TEST_F(LinearizeTest, CFG3) { // Block6 + Block9 // | + // Block4 ++ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::RETURN_VOID, @@ -149,7 +150,7 @@ TEST_F(LinearizeTest, CFG4) { // + / \ + // Block5 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 7, Instruction::IF_EQ, 0xFFFE, @@ -179,7 +180,7 @@ TEST_F(LinearizeTest, CFG5) { // +/ \ + // Block6 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -205,7 +206,7 @@ TEST_F(LinearizeTest, CFG6) { // Block5 <- Block9 Block6 + // | // Block7 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0004, @@ -233,7 +234,7 @@ TEST_F(LinearizeTest, CFG7) { // | // Block7 // - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0005, diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index e45d7c820c..66660662e4 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -31,10 +31,10 @@ namespace art { class LiveRangesTest : public OptimizingUnitTest { public: - HGraph* BuildGraph(const uint16_t* data); + HGraph* BuildGraph(const std::vector<uint16_t>& data); }; -HGraph* LiveRangesTest::BuildGraph(const uint16_t* data) { +HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. @@ -57,7 +57,7 @@ TEST_F(LiveRangesTest, CFG1) { * | * 12: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -102,7 +102,7 @@ TEST_F(LiveRangesTest, CFG2) { * | * 26: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -151,7 +151,7 @@ TEST_F(LiveRangesTest, CFG3) { * | * 28: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -225,7 +225,7 @@ TEST_F(LiveRangesTest, Loop1) { * 30: exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -304,7 +304,7 @@ TEST_F(LiveRangesTest, Loop2) { * We want to make sure the phi at 10 has a lifetime hole after the add at 20. */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, @@ -378,7 +378,7 @@ TEST_F(LiveRangesTest, CFG4) { * * We want to make sure the constant0 has a lifetime hole after the 16: add. */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::IF_EQ, 5, diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 35bc4ff8b3..6621a03568 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -31,7 +31,7 @@ namespace art { class LivenessTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; static void DumpBitVector(BitVector* vector, @@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector, buffer << ")\n"; } -void LivenessTest::TestCode(const uint16_t* data, const char* expected) { +void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); @@ -86,7 +86,7 @@ TEST_F(LivenessTest, CFG1) { " kill: (0)\n"; // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -108,7 +108,7 @@ TEST_F(LivenessTest, CFG2) { " live out: (0)\n" " kill: (0)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -134,7 +134,7 @@ TEST_F(LivenessTest, CFG3) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -181,7 +181,7 @@ TEST_F(LivenessTest, CFG4) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -228,7 +228,7 @@ TEST_F(LivenessTest, CFG5) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -273,7 +273,7 @@ TEST_F(LivenessTest, Loop1) { " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -318,7 +318,7 @@ TEST_F(LivenessTest, Loop3) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -370,7 +370,7 @@ TEST_F(LivenessTest, Loop4) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -425,7 +425,7 @@ TEST_F(LivenessTest, Loop5) { " live out: (0001)\n" " kill: (0001)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -475,7 +475,7 @@ TEST_F(LivenessTest, Loop6) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -530,7 +530,7 @@ TEST_F(LivenessTest, Loop7) { " live out: (00000)\n" " kill: (00000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -580,7 +580,7 @@ TEST_F(LivenessTest, Loop8) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 88326d321b..8b4eae1780 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -25,6 +25,51 @@ #include <iostream> +/** + * The general algorithm of load-store elimination (LSE). + * Load-store analysis in the previous pass collects a list of heap locations + * and does alias analysis of those heap locations. + * LSE keeps track of a list of heap values corresponding to the heap + * locations. It visits basic blocks in reverse post order and for + * each basic block, visits instructions sequentially, and processes + * instructions as follows: + * - If the instruction is a load, and the heap location for that load has a + * valid heap value, the load can be eliminated. In order to maintain the + * validity of all heap locations during the optimization phase, the real + * elimination is delayed till the end of LSE. + * - If the instruction is a store, it updates the heap value for the heap + * location of the store with the store instruction. The real heap value + * can be fetched from the store instruction. Heap values are invalidated + * for heap locations that may alias with the store instruction's heap + * location. The store instruction can be eliminated unless the value stored + * is later needed e.g. by a load from the same/aliased heap location or + * the heap location persists at method return/deoptimization. + * The store instruction is also needed if it's not used to track the heap + * value anymore, e.g. when it fails to merge with the heap values from other + * predecessors. + * - A store that stores the same value as the heap value is eliminated. + * - The list of heap values are merged at basic block entry from the basic + * block's predecessors. The algorithm is single-pass, so loop side-effects is + * used as best effort to decide if a heap location is stored inside the loop. + * - A special type of objects called singletons are instantiated in the method + * and have a single name, i.e. no aliases. Singletons have exclusive heap + * locations since they have no aliases. Singletons are helpful in narrowing + * down the life span of a heap location such that they do not always + * need to participate in merging heap values. Allocation of a singleton + * can be eliminated if that singleton is not used and does not persist + * at method return/deoptimization. + * - For newly instantiated instances, their heap values are initialized to + * language defined default values. + * - Some instructions such as invokes are treated as loading and invalidating + * all the heap values, depending on the instruction's side effects. + * - Finalizable objects are considered as persisting at method + * return/deoptimization. + * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad + * and VecStore instructions. + * - Currently this LSE algorithm doesn't handle graph with try-catch, due to + * the special block merging structure. + */ + namespace art { // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. @@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor { removed_loads_(allocator_.Adapter(kArenaAllocLSE)), substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)), possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), - singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) { + singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor { return type_conversion; } - // Find an instruction's substitute if it should be removed. + // Find an instruction's substitute if it's a removed load. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { + if (!IsLoad(instruction)) { + return instruction; + } size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { if (removed_loads_[i] == instruction) { - return substitute_instructions_for_loads_[i]; + HInstruction* substitute = substitute_instructions_for_loads_[i]; + // The substitute list is a flat hierarchy. + DCHECK_EQ(FindSubstitute(substitute), substitute); + return substitute; } } return instruction; } void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK(IsLoad(load)); DCHECK_EQ(FindSubstitute(heap_value), heap_value) << "Unexpected heap_value that has a substitute " << heap_value->DebugName(); removed_loads_.push_back(load); @@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor { new_instance->GetBlock()->RemoveInstruction(new_instance); } } - for (HInstruction* new_array : singleton_new_arrays_) { - size_t removed = HConstructorFence::RemoveConstructorFences(new_array); - MaybeRecordStat(stats_, - MethodCompilationStat::kConstructorFenceRemovedLSE, - removed); + } - if (!new_array->HasNonEnvironmentUses()) { - new_array->RemoveEnvironmentUsers(); - new_array->GetBlock()->RemoveInstruction(new_array); - } + private: + static bool IsLoad(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; } + // Unresolved load is not treated as a load. + return instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet(); } - private: - // If heap_values[index] is an instance field store, need to keep the store. - // This is necessary if a heap value is killed due to merging, or loop side - // effects (which is essentially merging also), since a load later from the - // location won't be eliminated. + static bool IsStore(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; + } + // Unresolved store is not treated as a store. + return instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->IsStaticFieldSet(); + } + + // Returns the real heap value by finding its substitute or by "peeling" + // a store instruction. + HInstruction* GetRealHeapValue(HInstruction* heap_value) { + if (IsLoad(heap_value)) { + return FindSubstitute(heap_value); + } + if (!IsStore(heap_value)) { + return heap_value; + } + + // We keep track of store instructions as the heap values which might be + // eliminated if the stores are later found not necessary. The real stored + // value needs to be fetched from the store instruction. + if (heap_value->IsInstanceFieldSet()) { + heap_value = heap_value->AsInstanceFieldSet()->GetValue(); + } else if (heap_value->IsStaticFieldSet()) { + heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else { + DCHECK(heap_value->IsArraySet()); + heap_value = heap_value->AsArraySet()->GetValue(); + } + // heap_value may already be a removed load. + return FindSubstitute(heap_value); + } + + // If heap_value is a store, need to keep the store. + // This is necessary if a heap value is killed or replaced by another value, + // so that the store is no longer used to track heap value. void KeepIfIsStore(HInstruction* heap_value) { - if (heap_value == kDefaultHeapValue || - heap_value == kUnknownHeapValue || - !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { + if (!IsStore(heap_value)) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor { } } + // If a heap location X may alias with heap location at `loc_index` + // and heap_values of that heap location X holds a store, keep that store. + // It's needed for a dependent load that's not eliminated since any store + // that may put value into the load's heap location needs to be kept. + void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values, + size_t loc_index) { + for (size_t i = 0; i < heap_values.size(); i++) { + if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) { + KeepIfIsStore(heap_values[i]); + } + } + } + void HandleLoopSideEffects(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ScopedArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; - // Don't eliminate loads in irreducible loops. This is safe for singletons, because - // they are always used by the non-eliminated loop-phi. + // Don't eliminate loads in irreducible loops. + // Also keep the stores before the loop. if (block->GetLoopInformation()->IsIrreducible()) { if (kIsDebugBuild) { for (size_t i = 0; i < heap_values.size(); i++) { DCHECK_EQ(heap_values[i], kUnknownHeapValue); } } + for (size_t i = 0; i < heap_values.size(); i++) { + KeepIfIsStore(pre_header_heap_values[i]); + } return; } - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ScopedArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; @@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor { for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* ref_info = location->GetReferenceInfo(); - if (ref_info->IsSingletonAndRemovable() && - !location->IsValueKilledByLoopSideEffects()) { - // A removable singleton's field that's not stored into inside a loop is + if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) { + // A singleton's field that's not stored into inside a loop is // invariant throughout the loop. Nothing to do. } else { - // heap value is killed by loop side effects (stored into directly, or - // due to aliasing). Or the heap value may be needed after method return - // or deoptimization. + // heap value is killed by loop side effects. KeepIfIsStore(pre_header_heap_values[i]); heap_values[i] = kUnknownHeapValue; } } + } else { + // The loop doesn't kill any value. } } @@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* merged_value = nullptr; + // If we can merge the store itself from the predecessors, we keep + // the store as the heap value as long as possible. In case we cannot + // merge the store, we try to merge the values of the stores. + HInstruction* merged_store_value = nullptr; // Whether merged_value is a result that's merged from all predecessors. bool from_all_predecessors = true; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* ref = ref_info->GetReference(); HInstruction* singleton_ref = nullptr; if (ref_info->IsSingleton()) { - // We do more analysis of liveness when merging heap values for such - // cases since stores into such references may potentially be eliminated. - singleton_ref = ref_info->GetReference(); + // We do more analysis based on singleton's liveness when merging + // heap values for such cases. + singleton_ref = ref; } for (HBasicBlock* predecessor : predecessors) { HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if (!IsStore(pred_value)) { + pred_value = FindSubstitute(pred_value); + } + DCHECK(pred_value != nullptr); + HInstruction* pred_store_value = GetRealHeapValue(pred_value); if ((singleton_ref != nullptr) && !singleton_ref->GetBlock()->Dominates(predecessor)) { - // singleton_ref is not live in this predecessor. Skip this predecessor since - // it does not really have the location. + // singleton_ref is not live in this predecessor. No need to merge + // since singleton_ref is not live at the beginning of this block. DCHECK_EQ(pred_value, kUnknownHeapValue); from_all_predecessors = false; - continue; + break; } if (merged_value == nullptr) { // First seen heap value. + DCHECK(pred_value != nullptr); merged_value = pred_value; } else if (pred_value != merged_value) { // There are conflicting values. merged_value = kUnknownHeapValue; + // We may still be able to merge store values. + } + + // Conflicting stores may be storing the same value. We do another merge + // of real stored values. + if (merged_store_value == nullptr) { + // First seen store value. + DCHECK(pred_store_value != nullptr); + merged_store_value = pred_store_value; + } else if (pred_store_value != merged_store_value) { + // There are conflicting store values. + merged_store_value = kUnknownHeapValue; + // There must be conflicting stores also. + DCHECK_EQ(merged_value, kUnknownHeapValue); + // No need to merge anymore. break; } } - if (ref_info->IsSingleton()) { - if (ref_info->IsSingletonAndNonRemovable() || - (merged_value == kUnknownHeapValue && - !block->IsSingleReturnOrReturnVoidAllowingPhis())) { - // The heap value may be needed after method return or deoptimization, - // or there are conflicting heap values from different predecessors and - // this block is not a single return, - // keep the last store in each predecessor since future loads may not - // be eliminated. + if (merged_value == nullptr) { + DCHECK(!from_all_predecessors); + DCHECK(singleton_ref != nullptr); + } + if (from_all_predecessors) { + if (ref_info->IsSingletonAndRemovable() && + block->IsSingleReturnOrReturnVoidAllowingPhis()) { + // Values in the singleton are not needed anymore. + } else if (!IsStore(merged_value)) { + // We don't track merged value as a store anymore. We have to + // hold the stores in predecessors live here. for (HBasicBlock* predecessor : predecessors) { ScopedArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; @@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor { } } } else { - // Currenctly we don't eliminate stores to non-singletons. + DCHECK(singleton_ref != nullptr); + // singleton_ref is non-existing at the beginning of the block. There is + // no need to keep the stores. } - if ((merged_value == nullptr) || !from_all_predecessors) { + if (!from_all_predecessors) { DCHECK(singleton_ref != nullptr); DCHECK((singleton_ref->GetBlock() == block) || - !singleton_ref->GetBlock()->Dominates(block)); + !singleton_ref->GetBlock()->Dominates(block)) + << "method: " << GetGraph()->GetMethodName(); // singleton_ref is not defined before block or defined only in some of its // predecessors, so block doesn't really have the location at its entry. heap_values[i] = kUnknownHeapValue; - } else { + } else if (predecessors.size() == 1) { + // Inherit heap value from the single predecessor. + DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value); heap_values[i] = merged_value; + } else { + DCHECK(merged_value == kUnknownHeapValue || + merged_value == kDefaultHeapValue || + merged_value->GetBlock()->Dominates(block)); + if (merged_value != kUnknownHeapValue) { + heap_values[i] = merged_value; + } else { + // Stores in different predecessors may be storing the same value. + heap_values[i] = merged_store_value; + } } } } @@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue) { - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same - // field/element that a "removed" store puts the value. That store - // must be to a singleton's field/element. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); - // heap_value may already have a substitute. - heap_value = FindSubstitute(heap_value); - } - } + heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); } else { if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { // The only situation where the same heap location has different type is when @@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor { DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); } + // Load isn't eliminated. Put the load as the value into the HeapLocation. + // This acts like GVN but with better aliasing analysis. + heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); return; } AddRemovedLoad(instruction, heap_value); @@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor { } bool Equal(HInstruction* heap_value, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + if (heap_value == kUnknownHeapValue) { + // Don't compare kUnknownHeapValue with other values. + return false; + } if (heap_value == value) { return true; } if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) { return true; } + HInstruction* real_heap_value = GetRealHeapValue(heap_value); + if (real_heap_value != heap_value) { + return Equal(real_heap_value, value); + } return false; } @@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor { size_t vector_length, int16_t declaring_class_def_index, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); @@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool same_value = false; bool possibly_redundant = false; + if (Equal(heap_value, value)) { // Store into the heap location with the same value. - same_value = true; - } else if (index != nullptr && - heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) { - // For array element, don't eliminate stores if the location can be aliased - // (due to either ref or index aliasing). - } else if (ref_info->IsSingleton()) { - // Store into a field/element of a singleton. The value cannot be killed due to - // aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. - // Stores whose values may be needed after method return or deoptimization - // are also removed from possibly_removed_stores_ when that is detected. - possibly_redundant = true; + // This store can be eliminated right away. + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else { HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - // instruction is a store in the loop so the loop must does write. + if (loop_info == nullptr) { + // Store is not in a loop. We try to precisely track the heap value by + // the store. + possibly_redundant = true; + } else if (!loop_info->IsIrreducible()) { + // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - - if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { - DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); - // Keep the store since its value may be needed at the loop header. - possibly_redundant = false; - } else { - // The singleton is created inside the loop. Value stored to it isn't needed at + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. + possibly_redundant = true; + } else { + // Keep the store since its value may be needed at the loop header. } + } else { + // Keep the store inside irreducible loops. } } - if (same_value || possibly_redundant) { + if (possibly_redundant) { possibly_removed_stores_.push_back(instruction); } - if (!same_value) { - if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); - // Put the store as the heap value. If the value is loaded from heap - // by a load later, this store isn't really redundant. - heap_values[idx] = instruction; - } else { - heap_values[idx] = value; - } - } + // Put the store as the heap value. If the value is loaded or needed after + // return/deoptimization later, this store isn't really redundant. + heap_values[idx] = instruction; + // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { if (i == idx) { continue; } - if (heap_values[i] == value) { + if (Equal(heap_values[i], value)) { // Same value should be kept even if aliasing happens. continue; } @@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor { continue; } if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias. + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { - // Filter out fake instructions before checking instruction kind below. - if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) { - continue; - } // A store is kept as the heap value for possibly removed stores. - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - // Check whether the reference for a store is used by an environment local of - // HDeoptimize. + // That value stored is generally observeable after deoptimization, except + // for singletons that don't escape after deoptimization. + if (IsStore(heap_value)) { + if (heap_value->IsStaticFieldSet()) { + KeepIfIsStore(heap_value); + continue; + } HInstruction* reference = heap_value->InputAt(0); - DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()); - for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { - HEnvironment* user = use.GetUser(); - if (user->GetHolder() == instruction) { - // The singleton for the store is visible at this deoptimization - // point. Need to keep the store so that the heap value is - // seen by the interpreter. + if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) { + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable objects alway escape. KeepIfIsStore(heap_value); + continue; + } + // Check whether the reference for a store is used by an environment local of + // HDeoptimize. If not, the singleton is not observed after + // deoptimizion. + for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder() == instruction) { + // The singleton for the store is visible at this deoptimization + // point. Need to keep the store so that the heap value is + // seen by the interpreter. + KeepIfIsStore(heap_value); + } } + } else { + KeepIfIsStore(heap_value); } } } @@ -691,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor { // Singleton references cannot be seen by the callee. } else { if (side_effects.DoesAnyRead()) { + // Invocation may read the heap value. KeepIfIsStore(heap_values[i]); } if (side_effects.DoesAnyWrite()) { + // Keep the store since it's not used to track the heap value anymore. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -758,7 +904,7 @@ class LSEVisitor : public HGraphDelegateVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_arrays_.push_back(new_array); + singleton_new_instances_.push_back(new_array); } ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; @@ -791,7 +937,6 @@ class LSEVisitor : public HGraphDelegateVisitor { ScopedArenaVector<HInstruction*> possibly_removed_stores_; ScopedArenaVector<HInstruction*> singleton_new_instances_; - ScopedArenaVector<HInstruction*> singleton_new_arrays_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 3dc1ef7534..899496328e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -30,46 +30,6 @@ namespace art { -// TODO: Clean up the packed type detection so that we have the right type straight away -// and do not need to go through this normalization. -static inline void NormalizePackedType(/* inout */ DataType::Type* type, - /* inout */ bool* is_unsigned) { - switch (*type) { - case DataType::Type::kBool: - DCHECK(!*is_unsigned); - break; - case DataType::Type::kUint8: - case DataType::Type::kInt8: - if (*is_unsigned) { - *is_unsigned = false; - *type = DataType::Type::kUint8; - } else { - *type = DataType::Type::kInt8; - } - break; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - if (*is_unsigned) { - *is_unsigned = false; - *type = DataType::Type::kUint16; - } else { - *type = DataType::Type::kInt16; - } - break; - case DataType::Type::kInt32: - case DataType::Type::kInt64: - // We do not have kUint32 and kUint64 at the moment. - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - DCHECK(!*is_unsigned); - break; - default: - LOG(FATAL) << "Unexpected type " << *type; - UNREACHABLE(); - } -} - // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; @@ -1362,8 +1322,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } if (VectorizeUse(node, r, generate_code, type, restrictions)) { if (generate_code) { - NormalizePackedType(&type, &is_unsigned); - GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } return true; } @@ -1865,18 +1827,26 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, case Intrinsics::kMathMinLongLong: case Intrinsics::kMathMinFloatFloat: case Intrinsics::kMathMinDoubleDouble: { - NormalizePackedType(&type, &is_unsigned); vector = new (global_allocator_) - HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc); + HVecMin(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc); break; } case Intrinsics::kMathMaxIntInt: case Intrinsics::kMathMaxLongLong: case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMaxDoubleDouble: { - NormalizePackedType(&type, &is_unsigned); vector = new (global_allocator_) - HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc); + HVecMax(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc); break; } default: @@ -1987,15 +1957,13 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { if (vector_mode_ == kVector) { - NormalizePackedType(&type, &is_unsigned); vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd( global_allocator_, vector_map_->Get(r), vector_map_->Get(s), - type, + HVecOperation::ToProperType(type, is_unsigned), vector_length_, is_rounded, - is_unsigned, kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { @@ -2086,7 +2054,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - NormalizePackedType(&reduction_type, &is_unsigned); + reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 727431a493..91e475d737 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -865,6 +865,15 @@ void HLoopInformation::Populate() { graph->SetHasLoops(true); } +void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) { + DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this); + blocks_.Union(&inner_loop->blocks_); + HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(this); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { HBasicBlock* block = header_->GetPredecessors()[0]; DCHECK(irreducible_ || (block == header_->GetDominator())); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index d4382c6b4c..43ca2cf874 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -32,11 +32,11 @@ #include "deoptimization_kind.h" #include "dex/dex_file.h" #include "dex/dex_file_types.h" +#include "dex/invoke_type.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" #include "intrinsics_enum.h" -#include "invoke_type.h" #include "locations.h" #include "method_reference.h" #include "mirror/class.h" @@ -826,6 +826,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Finds blocks that are part of this loop. void Populate(); + // Updates blocks population of the loop and all of its outer' ones recursively after the + // population of the inner loop is updated. + void PopulateInnerLoopUpwards(HLoopInformation* inner_loop); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -856,6 +860,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { bool HasExitEdge() const; + // Resets back edge and blocks-in-loop data. + void ResetBasicBlockData() { + back_edges_.clear(); + ClearAllBlocks(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -998,6 +1008,18 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { loop_information_->AddBackEdge(back_edge); } + // Registers a back edge; if the block was not a loop header before the call associates a newly + // created loop info with it. + // + // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop + // info for all blocks during back edges recalculation. + void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) { + if (loop_information_ == nullptr || loop_information_->GetHeader() != this) { + loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_); + } + loop_information_->AddBackEdge(back_edge); + } + HGraph* GetGraph() const { return graph_; } void SetGraph(HGraph* graph) { graph_ = graph; } @@ -2018,6 +2040,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance), // could throw OOME, but it is still OK to remove them if they are unused. virtual bool CanThrow() const { return false; } + + // Does the instruction always throw an exception unconditionally? + virtual bool AlwaysThrows() const { return false; } + bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -4169,6 +4195,10 @@ class HInvoke : public HVariableInputSizeInstruction { bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); } + void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } + + bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -4199,7 +4229,8 @@ class HInvoke : public HVariableInputSizeInstruction { static constexpr size_t kFieldReturnTypeSize = MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; - static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1; + static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1; + static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; @@ -6575,6 +6606,8 @@ class HThrow FINAL : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } + bool AlwaysThrows() const OVERRIDE { return true; } + DECLARE_INSTRUCTION(Throw); protected: @@ -7298,19 +7331,19 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { public: explicit CloneAndReplaceInstructionVisitor(HGraph* graph) - : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {} + : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} void VisitInstruction(HInstruction* instruction) OVERRIDE { if (instruction->IsClonable()) { ReplaceInstrOrPhiByClone(instruction); - instr_replaced_by_clones_count++; + instr_replaced_by_clones_count_++; } } - size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; } + size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; } private: - size_t instr_replaced_by_clones_count; + size_t instr_replaced_by_clones_count_; DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); }; diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 87dff8403b..ecabdf3b76 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -131,8 +131,6 @@ class HVecOperation : public HVariableInputSizeInstruction { } // Maps an integral type to the same-size signed type and leaves other types alone. - // Can be used to test relaxed type consistency in which packed same-size integral - // types can co-exist, but other type mixes are an error. static DataType::Type ToSignedType(DataType::Type type) { switch (type) { case DataType::Type::kBool: // 1-byte storage unit @@ -160,6 +158,11 @@ class HVecOperation : public HVariableInputSizeInstruction { } } + // Maps an integral type to the same-size (un)signed type. Leaves other types alone. + static DataType::Type ToProperType(DataType::Type type, bool is_unsigned) { + return is_unsigned ? ToUnsignedType(type) : ToSignedType(type); + } + // Helper method to determine if an instruction returns a SIMD value. // TODO: This method is needed until we introduce SIMD as proper type. static bool ReturnsSIMDValue(HInstruction* instruction) { @@ -286,6 +289,8 @@ class HVecMemoryOperation : public HVecOperation { }; // Packed type consistency checker ("same vector length" integral types may mix freely). +// Tests relaxed type consistency in which packed same-size integral types can co-exist, +// but other type mixes are an error. inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type type) { if (input->IsPhi()) { return input->GetType() == HVecOperation::kSIMDType; // carries SIMD @@ -518,7 +523,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { // Performs halving add on every component in the two vectors, viz. // rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] // truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecHalvingAdd FINAL : public HVecBinaryOperation { public: HVecHalvingAdd(ArenaAllocator* allocator, @@ -527,21 +532,13 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { DataType::Type packed_type, size_t vector_length, bool is_rounded, - bool is_unsigned, uint32_t dex_pc) : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned); SetPackedFlag<kFieldHAddIsRounded>(is_rounded); } - bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); } bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } bool CanBeMoved() const OVERRIDE { return true; } @@ -549,9 +546,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { DCHECK(other->IsVecHalvingAdd()); const HVecHalvingAdd* o = other->AsVecHalvingAdd(); - return HVecOperation::InstructionDataEquals(o) && - IsUnsigned() == o->IsUnsigned() && - IsRounded() == o->IsRounded(); + return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded(); } DECLARE_INSTRUCTION(VecHalvingAdd); @@ -561,8 +556,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { private: // Additional packed bits. - static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1; + static constexpr size_t kFieldHAddIsRounded = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1; static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; @@ -638,7 +632,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { // Takes minimum of every component in the two vectors, // viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMin FINAL : public HVecBinaryOperation { public: HVecMin(ArenaAllocator* allocator, @@ -646,44 +640,23 @@ class HVecMin FINAL : public HVecBinaryOperation { HInstruction* right, DataType::Type packed_type, size_t vector_length, - bool is_unsigned, uint32_t dex_pc) : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMin()); - const HVecMin* o = other->AsVecMin(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMin); protected: DEFAULT_COPY_CONSTRUCTOR(VecMin); - - private: - // Additional packed bits. - static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; - static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; // Takes maximum of every component in the two vectors, // viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ] -// for either both signed or both unsigned operands x, y. +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMax FINAL : public HVecBinaryOperation { public: HVecMax(ArenaAllocator* allocator, @@ -691,39 +664,18 @@ class HVecMax FINAL : public HVecBinaryOperation { HInstruction* right, DataType::Type packed_type, size_t vector_length, - bool is_unsigned, uint32_t dex_pc) : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) { - // The `is_unsigned` flag should be used exclusively with the Int32 or Int64. - // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types. - DCHECK(!is_unsigned || - packed_type == DataType::Type::kInt32 || - packed_type == DataType::Type::kInt64) << packed_type; DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMax()); - const HVecMax* o = other->AsVecMax(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMax); protected: DEFAULT_COPY_CONSTRUCTOR(VecMax); - - private: - // Additional packed bits. - static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; - static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); }; // Bitwise-ands every component in the two vectors, diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index ab9d7594d9..af13449646 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -282,143 +282,53 @@ TEST_F(NodesVectorTest, VectorAlignmentMattersOnStore) { EXPECT_FALSE(v0->Equals(v1)); // no longer equal } -TEST_F(NodesVectorTest, VectorSignMattersOnMin) { - HVecOperation* p0 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); - HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); - HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); - - HVecMin* v0 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc); - HVecMin* v1 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v2 = new (GetAllocator()) HVecMin( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc); - HVecMin* v3 = new (GetAllocator()) HVecMin( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v4 = new (GetAllocator()) HVecMin( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v5 = new (GetAllocator()) HVecMin( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMin* v6 = new (GetAllocator()) HVecMin( - GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMin* min_insns[] = { v0, v1, v2, v3, v4, v5, v6 }; - - EXPECT_FALSE(p0->CanBeMoved()); - EXPECT_FALSE(p1->CanBeMoved()); - EXPECT_FALSE(p2->CanBeMoved()); - - for (HVecMin* min_insn : min_insns) { - EXPECT_TRUE(min_insn->CanBeMoved()); - } - - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_FALSE(v1->IsUnsigned()); - EXPECT_TRUE(v2->IsUnsigned()); - - for (HVecMin* min_insn1 : min_insns) { - for (HVecMin* min_insn2 : min_insns) { - EXPECT_EQ(min_insn1 == min_insn2, min_insn1->Equals(min_insn2)); - } - } -} - -TEST_F(NodesVectorTest, VectorSignMattersOnMax) { - HVecOperation* p0 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); - HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); - HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); - - HVecMax* v0 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc); - HVecMax* v1 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v2 = new (GetAllocator()) HVecMax( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc); - HVecMax* v3 = new (GetAllocator()) HVecMax( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v4 = new (GetAllocator()) HVecMax( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v5 = new (GetAllocator()) HVecMax( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMax* v6 = new (GetAllocator()) HVecMax( - GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc); - HVecMax* max_insns[] = { v0, v1, v2, v3, v4, v5, v6 }; - - EXPECT_FALSE(p0->CanBeMoved()); - EXPECT_FALSE(p1->CanBeMoved()); - EXPECT_FALSE(p2->CanBeMoved()); - - for (HVecMax* max_insn : max_insns) { - EXPECT_TRUE(max_insn->CanBeMoved()); - } - - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_FALSE(v1->IsUnsigned()); - EXPECT_TRUE(v2->IsUnsigned()); - - for (HVecMax* max_insn1 : max_insns) { - for (HVecMax* max_insn2 : max_insns) { - EXPECT_EQ(max_insn1 == max_insn2, max_insn1->Equals(max_insn2)); - } - } -} - TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { + HVecOperation* u0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kUint32, 4, kNoDexPc); + HVecOperation* u1 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kUint16, 8, kNoDexPc); + HVecOperation* u2 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kUint8, 16, kNoDexPc); + HVecOperation* p0 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); HVecOperation* p1 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc); + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 8, kNoDexPc); HVecOperation* p2 = new (GetAllocator()) - HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc); + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 16, kNoDexPc); HVecHalvingAdd* v0 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v1 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ false, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v2 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v3 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 4, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v4 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p0, p0, DataType::Type::kInt32, 2, - /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc); + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v5 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v6 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kUint8, 16, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v7 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v8 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p1, p1, DataType::Type::kInt8, 16, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v9 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ false, kNoDexPc); HVecHalvingAdd* v10 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kUint16, 8, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ true, kNoDexPc); HVecHalvingAdd* v11 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kInt16, 2, - /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc); - HVecHalvingAdd* v12 = new (GetAllocator()) HVecHalvingAdd( - GetAllocator(), p2, p2, DataType::Type::kInt16, 2, - /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc); - HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 }; + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 }; + + EXPECT_FALSE(u0->CanBeMoved()); + EXPECT_FALSE(u1->CanBeMoved()); + EXPECT_FALSE(u2->CanBeMoved()); EXPECT_FALSE(p0->CanBeMoved()); EXPECT_FALSE(p1->CanBeMoved()); EXPECT_FALSE(p2->CanBeMoved()); @@ -427,26 +337,18 @@ TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { EXPECT_TRUE(hadd_insn->CanBeMoved()); } - // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64. - EXPECT_TRUE(v0->IsUnsigned()); - EXPECT_TRUE(v1->IsUnsigned()); - EXPECT_TRUE(!v2->IsUnsigned()); - EXPECT_TRUE(!v3->IsUnsigned()); - EXPECT_TRUE(v4->IsUnsigned()); - EXPECT_TRUE(v0->IsRounded()); EXPECT_TRUE(!v1->IsRounded()); EXPECT_TRUE(v2->IsRounded()); EXPECT_TRUE(!v3->IsRounded()); EXPECT_TRUE(v4->IsRounded()); - EXPECT_TRUE(v5->IsRounded()); - EXPECT_TRUE(!v6->IsRounded()); - EXPECT_TRUE(v7->IsRounded()); - EXPECT_TRUE(!v8->IsRounded()); - EXPECT_TRUE(v9->IsRounded()); - EXPECT_TRUE(!v10->IsRounded()); - EXPECT_TRUE(v11->IsRounded()); - EXPECT_TRUE(!v12->IsRounded()); + EXPECT_TRUE(!v5->IsRounded()); + EXPECT_TRUE(v6->IsRounded()); + EXPECT_TRUE(!v7->IsRounded()); + EXPECT_TRUE(v8->IsRounded()); + EXPECT_TRUE(!v9->IsRounded()); + EXPECT_TRUE(v10->IsRounded()); + EXPECT_TRUE(!v11->IsRounded()); for (HVecHalvingAdd* hadd_insn1 : hadd_insns) { for (HVecHalvingAdd* hadd_insn2 : hadd_insns) { diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index e2b2106f65..d20b681b49 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -41,7 +41,7 @@ namespace art { // Run the tests only on host. #ifndef ART_TARGET_ANDROID -class OptimizingCFITest : public CFITest { +class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; @@ -63,7 +63,7 @@ class OptimizingCFITest : public CFITest { // Setup simple context. std::string error; isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); - graph_ = CreateGraph(&pool_and_allocator_); + graph_ = CreateGraph(); // Generate simple frame with some spills. code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); code_gen_->GetAssembler()->cfi().SetEnabled(true); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f4115f7e7b..b3f23a0dcd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -382,6 +382,9 @@ class OptimizingCompiler FINAL : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; + void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + REQUIRES_SHARED(Locks::mutator_lock_); + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -772,7 +775,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, return nullptr; } - CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item); + CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx); HGraph* graph = new (allocator) HGraph( allocator, arena_stack, @@ -783,7 +786,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, compiler_driver->GetCompilerOptions().GetDebuggable(), osr); - const uint8_t* interpreter_metadata = nullptr; + ArrayRef<const uint8_t> interpreter_metadata; // For AOT compilation, we may not get a method, for example if its class is erroneous. // JIT should always have a method. DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); @@ -940,7 +943,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( compiler_driver, codegen.get(), compilation_stats_.get(), - /* interpreter_metadata */ nullptr, + /* interpreter_metadata */ ArrayRef<const uint8_t>(), handles); builder.BuildIntrinsicGraph(method); } @@ -1230,7 +1233,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1246,14 +1249,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1361,7 +1357,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1377,14 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1408,4 +1397,27 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } +void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + DCHECK(compiler_options.GenerateAnyDebugInfo()); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + GetCompilerDriver()->GetInstructionSet(), + GetCompilerDriver()->GetInstructionSetFeatures(), + mini_debug_info, + ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); + JITCodeEntry* entry = CreateJITCodeEntry(elf_file); + IncrementJITCodeEntryRefcount(entry, info.code_address); + + VLOG(jit) + << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) + << " size=" << PrettySize(elf_file.size()) + << " total_size=" << PrettySize(GetJITCodeEntryMemUsage()); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 32a94ab5e4..0023265e50 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -75,6 +75,7 @@ enum class MethodCompilationStat { kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kSimplifyThrowingInvoke, kInstructionSunk, kNotInlinedUnresolvedEntrypoint, kNotInlinedDexCache, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 8c97d57f4a..6dcbadba6e 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -17,12 +17,16 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ +#include <memory> +#include <vector> + #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" +#include "dex/standard_dex_file.h" #include "driver/dex_compilation_unit.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" @@ -99,18 +103,11 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator scoped_allocator_; }; -inline HGraph* CreateGraph(ArenaPoolAndAllocator* pool_and_allocator) { - return new (pool_and_allocator->GetAllocator()) HGraph( - pool_and_allocator->GetAllocator(), - pool_and_allocator->GetArenaStack(), - *reinterpret_cast<DexFile*>(pool_and_allocator->GetAllocator()->Alloc(sizeof(DexFile))), - /*method_idx*/-1, - kRuntimeISA); -} - -class OptimizingUnitTest : public CommonCompilerTest { - protected: - OptimizingUnitTest() : pool_and_allocator_(new ArenaPoolAndAllocator()) { } +// Have a separate helper so the OptimizingCFITest can inherit it without causing +// multiple inheritance errors from having two gtest as a parent twice. +class OptimizingUnitTestHelper { + public: + OptimizingUnitTestHelper() : pool_and_allocator_(new ArenaPoolAndAllocator()) { } ArenaAllocator* GetAllocator() { return pool_and_allocator_->GetAllocator(); } ArenaStack* GetArenaStack() { return pool_and_allocator_->GetArenaStack(); } @@ -122,14 +119,42 @@ class OptimizingUnitTest : public CommonCompilerTest { } HGraph* CreateGraph() { - return art::CreateGraph(pool_and_allocator_.get()); + ArenaAllocator* const allocator = pool_and_allocator_->GetAllocator(); + + // Reserve a big array of 0s so the dex file constructor can offsets from the header. + static constexpr size_t kDexDataSize = 4 * KB; + const uint8_t* dex_data = reinterpret_cast<uint8_t*>(allocator->Alloc(kDexDataSize)); + + // Create the dex file based on the fake data. Call the constructor so that we can use virtual + // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. + dex_files_.emplace_back(new StandardDexFile( + dex_data, + sizeof(StandardDexFile::Header), + "no_location", + /*location_checksum*/ 0, + /*oat_dex_file*/ nullptr, + /*container*/ nullptr)); + + return new (allocator) HGraph( + allocator, + pool_and_allocator_->GetArenaStack(), + *dex_files_.back(), + /*method_idx*/-1, + kRuntimeISA); } // Create a control-flow graph from Dex instructions. - HGraph* CreateCFG(const uint16_t* data, DataType::Type return_type = DataType::Type::kInt32) { - const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(data); + HGraph* CreateCFG(const std::vector<uint16_t>& data, + DataType::Type return_type = DataType::Type::kInt32) { HGraph* graph = CreateGraph(); + // The code item data might not aligned to 4 bytes, copy it to ensure that. + const size_t code_item_size = data.size() * sizeof(data.front()); + void* aligned_data = GetAllocator()->Alloc(code_item_size); + memcpy(aligned_data, &data[0], code_item_size); + CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment); + const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data); + { ScopedObjectAccess soa(Thread::Current()); if (handles_ == nullptr) { @@ -146,7 +171,7 @@ class OptimizingUnitTest : public CommonCompilerTest { /* access_flags */ 0u, /* verified_method */ nullptr, handles_->NewHandle<mirror::DexCache>(nullptr)); - CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item); + CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u); HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type); bool graph_built = (builder.BuildGraph() == kAnalysisSuccess); return graph_built ? graph : nullptr; @@ -154,10 +179,13 @@ class OptimizingUnitTest : public CommonCompilerTest { } private: + std::vector<std::unique_ptr<const StandardDexFile>> dex_files_; std::unique_ptr<ArenaPoolAndAllocator> pool_and_allocator_; std::unique_ptr<VariableSizedHandleScope> handles_; }; +class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; + // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 4fc7fe9427..6ef386b4a5 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -29,10 +29,10 @@ namespace art { class PrettyPrinterTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; -void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) { +void PrettyPrinterTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); StringPrettyPrinter printer(graph); printer.VisitInsertionOrder(); @@ -40,7 +40,7 @@ void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) { } TEST_F(PrettyPrinterTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); const char* expected = @@ -67,7 +67,7 @@ TEST_F(PrettyPrinterTest, CFG1) { "BasicBlock 3, pred: 2\n" " 4: Exit\n"; - const uint16_t data[] = + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -89,7 +89,7 @@ TEST_F(PrettyPrinterTest, CFG2) { "BasicBlock 4, pred: 3\n" " 5: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -111,21 +111,21 @@ TEST_F(PrettyPrinterTest, CFG3) { "BasicBlock 4, pred: 2\n" " 5: Exit\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, expected); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -144,13 +144,13 @@ TEST_F(PrettyPrinterTest, CFG4) { "BasicBlock 3, pred: 0, succ: 1\n" " 0: Goto 1\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, expected); @@ -166,7 +166,7 @@ TEST_F(PrettyPrinterTest, CFG5) { "BasicBlock 3, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -192,7 +192,7 @@ TEST_F(PrettyPrinterTest, CFG6) { "BasicBlock 5, pred: 1, succ: 3\n" " 0: Goto 3\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -220,7 +220,7 @@ TEST_F(PrettyPrinterTest, CFG7) { "BasicBlock 6, pred: 1, succ: 2\n" " 1: Goto 2\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -240,7 +240,7 @@ TEST_F(PrettyPrinterTest, IntConstant) { "BasicBlock 2, pred: 1\n" " 4: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 1d3fe0334d..27f9ac3990 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -103,6 +103,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint case DataType::Type::kFloat64: slot += long_spill_slots; FALLTHROUGH_INTENDED; + case DataType::Type::kUint64: case DataType::Type::kInt64: slot += float_spill_slots; FALLTHROUGH_INTENDED; @@ -110,6 +111,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint slot += int_spill_slots; FALLTHROUGH_INTENDED; case DataType::Type::kReference: + case DataType::Type::kUint32: case DataType::Type::kInt32: case DataType::Type::kUint16: case DataType::Type::kUint8: diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index ad5248e982..fa7ad82316 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1972,6 +1972,8 @@ void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* case DataType::Type::kInt16: int_intervals.push_back(parent); break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); UNREACHABLE(); diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index cfe63bd758..216fb57a96 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1131,6 +1131,8 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { case DataType::Type::kInt16: spill_slots = &int_spill_slots_; break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 3748d599a3..a70b0664dc 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -46,7 +46,7 @@ class RegisterAllocatorTest : public OptimizingUnitTest { void ExpectedInRegisterHint(Strategy strategy); // Helper functions that make use of the OptimizingUnitTest's members. - bool Check(const uint16_t* data, Strategy strategy); + bool Check(const std::vector<uint16_t>& data, Strategy strategy); void CFG1(Strategy strategy); void Loop1(Strategy strategy); void Loop2(Strategy strategy); @@ -79,7 +79,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } -bool RegisterAllocatorTest::Check(const uint16_t* data, Strategy strategy) { +bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); @@ -185,7 +185,7 @@ void RegisterAllocatorTest::CFG1(Strategy strategy) { * | * exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -222,7 +222,7 @@ void RegisterAllocatorTest::Loop1(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -268,7 +268,7 @@ void RegisterAllocatorTest::Loop2(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 8 << 12 | 1 << 8, Instruction::IF_EQ | 1 << 8, 7, @@ -314,7 +314,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { * exit */ - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8, Instruction::CONST_4 | 5 << 12 | 2 << 8, @@ -351,7 +351,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { TEST_ALL_STRATEGIES(Loop3); TEST_F(RegisterAllocatorTest, FirstRegisterUse) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8, @@ -402,7 +402,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) { * } while (true); */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 0, Instruction::IF_NE | 1 << 8 | 1 << 12, 3, @@ -432,7 +432,7 @@ TEST_ALL_STRATEGIES(DeadPhi); * This test only applies to the linear scan allocator. */ TEST_F(RegisterAllocatorTest, FreeUntil) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 104ebc79c2..fb15fc8975 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -182,7 +182,9 @@ class SchedulerTest : public OptimizingUnitTest { scheduler->Schedule(graph_); } - void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) { + void CompileWithRandomSchedulerAndRun(const std::vector<uint16_t>& data, + bool has_result, + int expected) { for (CodegenTargetConfig target_config : GetTargetConfigs()) { HGraph* graph = CreateCFG(data); @@ -393,7 +395,7 @@ TEST_F(SchedulerTest, RandomScheduling) { // } // return result; // - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0 Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000 Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1 diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 77e70d733e..85ed06eb9b 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -31,7 +31,7 @@ namespace art { class SsaTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data, const char* expected); + void TestCode(const std::vector<uint16_t>& data, const char* expected); }; class SsaPrettyPrinter : public HPrettyPrinter { @@ -80,7 +80,7 @@ static void ReNumberInstructions(HGraph* graph) { } } -void SsaTest::TestCode(const uint16_t* data, const char* expected) { +void SsaTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. @@ -119,7 +119,7 @@ TEST_F(SsaTest, CFG1) { "BasicBlock 5, pred: 1, succ: 3\n" " 7: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -150,7 +150,7 @@ TEST_F(SsaTest, CFG2) { "BasicBlock 5, pred: 1, succ: 3\n" " 9: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -181,7 +181,7 @@ TEST_F(SsaTest, CFG3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -214,7 +214,7 @@ TEST_F(SsaTest, Loop1) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -245,7 +245,7 @@ TEST_F(SsaTest, Loop2) { "BasicBlock 5, pred: 4\n" " 9: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -276,7 +276,7 @@ TEST_F(SsaTest, Loop3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -310,7 +310,7 @@ TEST_F(SsaTest, Loop4) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -351,7 +351,7 @@ TEST_F(SsaTest, Loop5) { " 13: Phi(2, 1) [11, 8, 8]\n" " 14: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -390,7 +390,7 @@ TEST_F(SsaTest, Loop6) { "BasicBlock 7, pred: 6\n" " 13: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -432,7 +432,7 @@ TEST_F(SsaTest, Loop7) { "BasicBlock 8, pred: 2, succ: 6\n" " 15: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -456,7 +456,7 @@ TEST_F(SsaTest, DeadLocal) { "BasicBlock 2, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -484,7 +484,7 @@ TEST_F(SsaTest, LocalInIf) { "BasicBlock 5, pred: 1, succ: 3\n" " 8: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 1 << 8, @@ -520,7 +520,7 @@ TEST_F(SsaTest, MultiplePredecessors) { "BasicBlock 7, pred: 3, succ: 5\n" " 12: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc new file mode 100644 index 0000000000..a7c23bef7e --- /dev/null +++ b/compiler/optimizing/superblock_cloner.cc @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "superblock_cloner.h" + +#include "common_dominator.h" +#include "graph_checker.h" + +#include <iostream> + +namespace art { + +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; + +void HEdge::Dump(std::ostream& stream) const { + stream << "(" << from_ << "->" << to_ << ")"; +} + +// +// Static helper methods. +// + +// Returns whether instruction has any uses (regular or environmental) outside the region, +// defined by basic block set. +static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) { + auto& uses = instr->GetUses(); + for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + auto& env_uses = instr->GetEnvUses(); + for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser()->GetHolder(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + return false; +} + +// Returns whether the phi's inputs are the same HInstruction. +static bool ArePhiInputsTheSame(const HPhi* phi) { + HInstruction* first_input = phi->InputAt(0); + for (size_t i = 1, e = phi->InputCount(); i < e; i++) { + if (phi->InputAt(i) != first_input) { + return false; + } + } + + return true; +} + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 != nullptr || loop2 != nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } else if (loop2->IsIn(*loop1)) { + return loop1; + } + HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); + return block->GetLoopInformation(); +} + +// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. +static void OrderLoopsHeadersPredecessors(HGraph* graph) { + for (HBasicBlock* block : graph->GetPostOrder()) { + if (block->IsLoopHeader()) { + graph->OrderLoopHeaderPredecessors(block); + } + } +} + +// +// Helpers for CloneBasicBlock. +// + +void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) { + DCHECK(!copy_instr->IsPhi()); + for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) { + // Copy instruction holds the same input as the original instruction holds. + HInstruction* orig_input = copy_instr->InputAt(i); + if (!IsInOrigBBSet(orig_input->GetBlock())) { + // Defined outside the subgraph. + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + // copy_instr will be registered as a user of copy_inputs after returning from this function: + // 'copy_block->AddInstruction(copy_instr)'. + copy_instr->SetRawInputAt(i, copy_input); + } +} + +void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, + const HEnvironment* orig_env) { + if (orig_env->GetParent() != nullptr) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent()); + } + HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr); + + for (size_t i = 0; i < orig_env->Size(); i++) { + HInstruction* env_input = orig_env->GetInstructionAt(i); + if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) { + env_input = GetInstrCopy(env_input); + DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr); + } + copy_env->SetRawEnvAt(i, env_input); + if (env_input != nullptr) { + env_input->AddEnvUseAt(copy_env, i); + } + } + // InsertRawEnvironment assumes that instruction already has an environment that's why we use + // SetRawEnvironment in the 'else' case. + // As this function calls itself recursively with the same copy_instr - this copy_instr may + // have partially copied chain of HEnvironments. + if (copy_instr->HasEnvironment()) { + copy_instr->InsertRawEnvironment(copy_env); + } else { + copy_instr->SetRawEnvironment(copy_env); + } +} + +// +// Helpers for RemapEdgesSuccessors. +// + +void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + + size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block); + size_t phi_input_count = 0; + // This flag reflects whether the original successor has at least one phi and this phi + // has been already processed in the loop. Used for validation purposes in DCHECK to check that + // in the end all of the phis in the copy successor have the same number of inputs - the number + // of copy successor's predecessors. + bool first_phi_met = false; + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(this_index); + // Remove corresponding input for original phi. + orig_phi->RemoveInputAt(this_index); + // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds + // to orig_block, so add the input at the end of the list. + copy_phi->AddInput(orig_phi_input); + if (!first_phi_met) { + phi_input_count = copy_phi->InputCount(); + first_phi_met = true; + } else { + DCHECK_EQ(phi_input_count, copy_phi->InputCount()); + } + } + // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds + // to the previously added phi inputs position. + orig_block->ReplaceSuccessor(orig_succ, copy_succ); + DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count); +} + +void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + copy_block->AddSuccessor(copy_succ); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + copy_phi->AddInput(orig_phi_input); + } +} + +void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + DCHECK(copy_block->HasSuccessor(orig_succ)); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + orig_phi->AddInput(orig_phi_input); + } +} + +// +// Local versions of CF calculation/adjustment routines. +// + +// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect +// the performance of the base version by checking the local set. +// TODO: this version works when updating the back edges info for natural loop-based local_set. +// Check which exactly types of subgraphs can be analysed or rename it to +// FindBackEdgesInTheNaturalLoop. +void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) { + ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited.GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. + ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(), + 0u, + arena_->Adapter(kArenaAllocGraphBuilder)); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + + visited.SetBit(entry_block->GetBlockId()); + visiting.SetBit(entry_block->GetBlockId()); + worklist.push_back(entry_block); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (!local_set->IsBitSet(successor_id)) { + continue; + } + + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdgeWhileUpdating(current); + } else if (!visited.IsBitSet(successor_id)) { + visited.SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } +} + +void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { + // TODO: DCHECK that after the transformation the graph is connected. + HBasicBlock* block_entry = nullptr; + + if (outer_loop_ == nullptr) { + for (auto block : graph_->GetBlocks()) { + if (block != nullptr) { + outer_loop_bb_set->SetBit(block->GetBlockId()); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + block_entry = graph_->GetEntryBlock(); + } else { + outer_loop_bb_set->Copy(&outer_loop_bb_set_); + block_entry = outer_loop_->GetHeader(); + + // Add newly created copy blocks. + for (auto entry : *bb_map_) { + outer_loop_bb_set->SetBit(entry.second->GetBlockId()); + } + + // Clear loop_info for the whole outer loop. + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + + FindBackEdgesLocal(block_entry, outer_loop_bb_set); + + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + // Reset LoopInformation for regular blocks and old headers which are no longer loop headers. + if (info != nullptr && + (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) { + block->SetLoopInformation(nullptr); + } + } +} + +// This is a modified version of HGraph::AnalyzeLoops. +GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) { + // We iterate post order to ensure we visit inner loops before outer loops. + // `PopulateRecursive` needs this guarantee to know whether a natural loop + // contains an irreducible loop. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return kAnalysisFailThrowCatchLoop; + } + block->GetLoopInformation()->Populate(); + } + } + + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + HLoopInformation* cur_loop = block->GetLoopInformation(); + HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(cur_loop); + } + } + } + + return kAnalysisSuccess; +} + +void SuperblockCloner::CleanUpControlFlow() { + // TODO: full control flow clean up for now, optimize it. + graph_->ClearDominanceInformation(); + + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + // TODO: do it locally. + graph_->SimplifyCFG(); + graph_->ComputeDominanceInformation(); + + // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just + // before in ComputeDominanceInformation. + GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set); + DCHECK_EQ(result, kAnalysisSuccess); + + // TODO: do it locally + OrderLoopsHeadersPredecessors(graph_); + + graph_->ComputeTryBlockInformation(); +} + +// +// Helpers for ResolveDataFlow +// + +void SuperblockCloner::ResolvePhi(HPhi* phi) { + HBasicBlock* phi_block = phi->GetBlock(); + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* input_block = input->GetBlock(); + + // Originally defined outside the region. + if (!IsInOrigBBSet(input_block)) { + continue; + } + HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i]; + if (!IsInOrigBBSet(corresponding_block)) { + phi->ReplaceInput(GetInstrCopy(input), i); + } + } +} + +// +// Main algorithm methods. +// + +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { + DCHECK(exits->empty()); + for (uint32_t block_id : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(block_id); + for (HBasicBlock* succ : block->GetSuccessors()) { + if (!IsInOrigBBSet(succ)) { + exits->push_back(succ); + } + } + } +} + +void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { + DCHECK(outer_loop_ == nullptr); + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // For a reducible graph we need to update back-edges and dominance information only for + // the outermost loop which is affected by the transformation - it can be found by picking + // the common most outer loop of loops to which the subgraph exits blocks belong. + // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case). + for (HBasicBlock* exit : exits) { + HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation(); + if (loop_exit_loop_info == nullptr) { + outer_loop_ = nullptr; + break; + } + outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); + } + + if (outer_loop_ != nullptr) { + // Save the loop population info as it will be changed later. + outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks()); + } +} + +void SuperblockCloner::RemapEdgesSuccessors() { + // Redirect incoming edges. + for (HEdge e : *remap_incoming_) { + HBasicBlock* orig_block = GetBlockById(e.GetFrom()); + HBasicBlock* orig_succ = GetBlockById(e.GetTo()); + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + + // Redirect internal edges. + for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { + HBasicBlock* orig_block = GetBlockById(orig_block_id); + + for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) { + uint32_t orig_succ_id = orig_succ->GetBlockId(); + + // Check for outgoing edge. + if (!IsInOrigBBSet(orig_succ)) { + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + continue; + } + + auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + + // Due to construction all successors of copied block were set to original. + if (copy_redir != remap_copy_internal_->end()) { + RemapCopyInternalEdge(orig_block, orig_succ); + } else { + AddCopyInternalEdge(orig_block, orig_succ); + } + + if (orig_redir != remap_orig_internal_->end()) { + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + } + } +} + +void SuperblockCloner::AdjustControlFlowInfo() { + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + graph_->ClearDominanceInformation(); + // TODO: Do it locally. + graph_->ComputeDominanceInformation(); +} + +// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to +// the valid values; only phis' inputs must be adjusted. +void SuperblockCloner::ResolveDataFlow() { + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + ResolvePhi(orig_phi); + ResolvePhi(copy_phi); + } + if (kIsDebugBuild) { + // Inputs of instruction copies must be already mapped to correspondent inputs copies. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + CheckInstructionInputsRemapping(it.Current()); + } + } + } +} + +// +// Debug and logging methods. +// + +void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { + DCHECK(!orig_instr->IsPhi()); + HInstruction* copy_instr = GetInstrCopy(orig_instr); + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock())); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (!IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + + // Resolve environment. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + + for (size_t i = 0, e = orig_env->Size(); i < e; ++i) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + } +} + +// +// Public methods. +// + +SuperblockCloner::SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map) + : graph_(graph), + arena_(graph->GetAllocator()), + orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + remap_orig_internal_(nullptr), + remap_copy_internal_(nullptr), + remap_incoming_(nullptr), + bb_map_(bb_map), + hir_map_(hir_map), + outer_loop_(nullptr), + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + orig_bb_set_.Copy(orig_bb_set); +} + +void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming) { + remap_orig_internal_ = remap_orig_internal; + remap_copy_internal_ = remap_copy_internal; + remap_incoming_ = remap_incoming; +} + +bool SuperblockCloner::IsSubgraphClonable() const { + // TODO: Support irreducible graphs and graphs with try-catch. + if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) { + return false; + } + + // Check that there are no instructions defined in the subgraph and used outside. + // TODO: Improve this by accepting graph with such uses but only one exit. + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + } + + return true; +} + +void SuperblockCloner::Run() { + DCHECK(bb_map_ != nullptr); + DCHECK(hir_map_ != nullptr); + DCHECK(remap_orig_internal_ != nullptr && + remap_copy_internal_ != nullptr && + remap_incoming_ != nullptr); + DCHECK(IsSubgraphClonable()); + + // Find an area in the graph for which control flow information should be adjusted. + FindAndSetLocalAreaForAdjustments(); + // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be + // adjusted. + CloneBasicBlocks(); + // Connect the blocks together/remap successors and fix phis which are directly affected my the + // remapping. + RemapEdgesSuccessors(); + // Recalculate dominance and backedge information which is required by the next stage. + AdjustControlFlowInfo(); + // Fix data flow of the graph. + ResolveDataFlow(); +} + +void SuperblockCloner::CleanUp() { + CleanUpControlFlow(); + + // Remove phis which have all inputs being same. + // When a block has a single predecessor it must not have any phis. However after the + // transformation it could happen that there is such block with a phi with a single input. + // As this is needed to be processed we also simplify phis with multiple same inputs here. + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + orig_block->RemovePhi(phi); + } + } + + HBasicBlock* copy_block = GetBlockCopy(orig_block); + for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + copy_block->RemovePhi(phi); + } + } + } +} + +HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { + HGraph* graph = orig_block->GetGraph(); + HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc()); + graph->AddBlock(copy_block); + + // Clone all the phis and add them to the map. + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + copy_block->AddPhi(copy_instr->AsPhi()); + copy_instr->AsPhi()->RemoveAllInputs(); + DCHECK(!orig_instr->HasEnvironment()); + hir_map_->Put(orig_instr, copy_instr); + } + + // Clone all the instructions and add them to the map. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + ReplaceInputsWithCopies(copy_instr); + copy_block->AddInstruction(copy_instr); + if (orig_instr->HasEnvironment()) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment()); + } + hir_map_->Put(orig_instr, copy_instr); + } + + return copy_block; +} + +void SuperblockCloner::CloneBasicBlocks() { + // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied + // instructions might be replaced by copies of the original inputs (depending where those inputs + // are defined). So the definitions of the original inputs must be visited before their original + // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')" + // guarantees that. + for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) { + if (!IsInOrigBBSet(orig_block)) { + continue; + } + HBasicBlock* copy_block = CloneBasicBlock(orig_block); + bb_map_->Put(orig_block, copy_block); + if (kSuperblockClonerLogging) { + std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() << + std::endl; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h new file mode 100644 index 0000000000..23de692673 --- /dev/null +++ b/compiler/optimizing/superblock_cloner.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ +#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ + +#include "base/arena_bit_vector.h" +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" +#include "nodes.h" + +namespace art { + +static const bool kSuperblockClonerLogging = false; +static const bool kSuperblockClonerVerify = false; + +// Represents an edge between two HBasicBlocks. +// +// Note: objects of this class are small - pass them by value. +class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { + public: + HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} + + uint32_t GetFrom() const { return from_; } + uint32_t GetTo() const { return to_; } + + bool operator==(const HEdge& other) const { + return this->from_ == other.from_ && this->to_ == other.to_; + } + + bool operator!=(const HEdge& other) const { return !operator==(other); } + void Dump(std::ostream& stream) const; + + // Returns whether an edge represents a valid edge in CF graph: whether the from_ block + // has to_ block as a successor. + bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } + + private: + // Predecessor block id. + uint32_t from_; + // Successor block id. + uint32_t to_; +}; + +// Returns whether a HEdge edge corresponds to an existing edge in the graph. +inline bool IsEdgeValid(HEdge edge, HGraph* graph) { + if (!edge.IsValid()) { + return false; + } + uint32_t from = edge.GetFrom(); + uint32_t to = edge.GetTo(); + if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { + return false; + } + + HBasicBlock* block_from = graph->GetBlocks()[from]; + HBasicBlock* block_to = graph->GetBlocks()[to]; + if (block_from == nullptr || block_to == nullptr) { + return false; + } + + return block_from->HasSuccessor(block_to, 0); +} + +// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without +// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted +// automatically. The clone transformation is defined by specifying a set of basic blocks to copy +// and a set of rules how to treat edges, remap their successors. By using this approach such +// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. +// +// The idea of the transformation is based on "Superblock cloning" technique described in the book +// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University +// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective +// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. +// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. +// +// There are two states of the IR graph: original graph (before the transformation) and +// copy graph (after). +// +// Before the transformation: +// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original +// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", +// where pred, succ - basic blocks): +// - internal - pred, succ are members of ‘orig_bb_set’. +// - outside - pred, succ are not members of ‘orig_bb_set’. +// - incoming - pred is not a member of ‘orig_bb_set’, succ is. +// - outgoing - pred is a member of ‘orig_bb_set’, succ is not. +// +// Transformation: +// +// 1. Initial cloning: +// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks +// form ‘copy_bb_set’. +// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge +// set. +// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge +// set. +// 2. Successors remapping. +// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should +// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). +// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors +// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). +// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph +// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into +// (X, Y_1)). +// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). +// 4. Fix/resolve data flow. +// 5. Do cleanups (DCE, critical edges splitting, etc). +// +class SuperblockCloner : public ValueObject { + public: + // TODO: Investigate optimal types for the containers. + using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; + using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; + using HBasicBlockSet = ArenaBitVector; + using HEdgeSet = ArenaHashSet<HEdge>; + + SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map); + + // Sets edge successor remapping info specified by corresponding edge sets. + void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming); + + // Returns whether the specified subgraph is copyable. + // TODO: Start from small range of graph patterns then extend it. + bool IsSubgraphClonable() const; + + // Runs the copy algorithm according to the description. + void Run(); + + // Cleans up the graph after transformation: splits critical edges, recalculates control flow + // information (back-edges, dominators, loop info, etc), eliminates redundant phis. + void CleanUp(); + + // Returns a clone of a basic block (orig_block). + // + // - The copy block will have no successors/predecessors; they should be set up manually. + // - For each instruction in the orig_block a copy is created and inserted into the copy block; + // this correspondence is recorded in the map (old instruction, new instruction). + // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the + // same, as in the original block, PHIs do not reflect a correct correspondence between the + // value and predecessors (as the copy block has no predecessors by now), etc. + HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); + + // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ + // and hir_map_. + void CloneBasicBlocks(); + + HInstruction* GetInstrCopy(HInstruction* orig_instr) const { + auto copy_input_iter = hir_map_->find(orig_instr); + DCHECK(copy_input_iter != hir_map_->end()); + return copy_input_iter->second; + } + + HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { + HBasicBlock* block = bb_map_->Get(orig_block); + DCHECK(block != nullptr); + return block; + } + + HInstruction* GetInstrOrig(HInstruction* copy_instr) const { + for (auto it : *hir_map_) { + if (it.second == copy_instr) { + return it.first; + } + } + return nullptr; + } + + bool IsInOrigBBSet(uint32_t block_id) const { + return orig_bb_set_.IsBitSet(block_id); + } + + bool IsInOrigBBSet(const HBasicBlock* block) const { + return IsInOrigBBSet(block->GetBlockId()); + } + + private: + // Fills the 'exits' vector with the subgraph exits. + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + + // Finds and records information about the area in the graph for which control-flow (back edges, + // loops, dominators) needs to be adjusted. + void FindAndSetLocalAreaForAdjustments(); + + // Remaps edges' successors according to the info specified in the edges sets. + // + // Only edge successors/predecessors and phis' input records (to have a correspondence between + // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither + // phis' nor instructions' inputs values are resolved. + void RemapEdgesSuccessors(); + + // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // FindAndSetLocalAreaForAdjustments. + void AdjustControlFlowInfo(); + + // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in + // the SSA form. + void ResolveDataFlow(); + + // + // Helpers for CloneBasicBlock. + // + + // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the + // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. + void ReplaceInputsWithCopies(HInstruction* copy_instr); + + // Recursively clones the environment for the copy instruction. If the input of the original + // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise + // leaves it the same as original. + void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); + + // + // Helpers for RemapEdgesSuccessors. + // + + // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and + // copy_succ. + void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. + void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. + void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // + // Local versions of control flow calculation/adjustment routines. + // + + void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); + void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); + GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); + void CleanUpControlFlow(); + + // + // Helpers for ResolveDataFlow + // + + // Resolves the inputs of the phi. + void ResolvePhi(HPhi* phi); + + // + // Debug and logging methods. + // + void CheckInstructionInputsRemapping(HInstruction* orig_instr); + + HBasicBlock* GetBlockById(uint32_t block_id) const { + DCHECK(block_id < graph_->GetBlocks().size()); + HBasicBlock* block = graph_->GetBlocks()[block_id]; + DCHECK(block != nullptr); + return block; + } + + HGraph* const graph_; + ArenaAllocator* const arena_; + + // Set of basic block in the original graph to be copied. + HBasicBlockSet orig_bb_set_; + + // Sets of edges which require successors remapping. + const HEdgeSet* remap_orig_internal_; + const HEdgeSet* remap_copy_internal_; + const HEdgeSet* remap_incoming_; + + // Correspondence map for blocks: (original block, copy block). + HBasicBlockMap* bb_map_; + // Correspondence map for instructions: (original HInstruction, copy HInstruction). + HInstructionMap* hir_map_; + // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + HLoopInformation* outer_loop_; + HBasicBlockSet outer_loop_bb_set_; + + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + + DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); +}; + +} // namespace art + +namespace std { + +template <> +struct hash<art::HEdge> { + size_t operator()(art::HEdge const& x) const noexcept { + // Use Cantor pairing function as the hash function. + uint32_t a = x.GetFrom(); + uint32_t b = x.GetTo(); + return (a + b) * (a + b + 1) / 2 + b; + } +}; + +} // namespace std + +#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index fd77eb81fc..f1b7bffdf5 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -17,11 +17,15 @@ #include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" +#include "superblock_cloner.h" #include "gtest/gtest.h" namespace art { +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; + // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. class SuperblockClonerTest : public OptimizingUnitTest { @@ -182,4 +186,121 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) { EXPECT_NE(new_suspend_check, nullptr); } +// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of +// instructions' inputs. +TEST_F(SuperblockClonerTest, CloneBasicBlocks) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.CloneBasicBlocks(); + + EXPECT_EQ(bb_map.size(), 2u); + EXPECT_EQ(hir_map.size(), 12u); + + for (auto it : hir_map) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + + EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock()); + EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind()); + EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType()); + + if (orig_instr->IsPhi()) { + continue; + } + + EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount()); + + // Check that inputs match. + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + HInstruction* copy_input = copy_instr->InputAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + + EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment()); + + // Check that environments match. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + HEnvironment* copy_env = copy_instr->GetEnvironment(); + + EXPECT_EQ(copy_env->GetParent(), nullptr); + EXPECT_EQ(orig_env->Size(), copy_env->Size()); + + for (size_t i = 0, e = orig_env->Size(); i < e; i++) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + HInstruction* copy_input = copy_env->GetInstructionAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + } + } +} + +// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control +// flow. +TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + nullptr, + nullptr); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.FindAndSetLocalAreaForAdjustments(); + cloner.CleanUpControlFlow(); + + EXPECT_TRUE(CheckGraph()); + + EXPECT_TRUE(entry_block_->Dominates(header)); + EXPECT_TRUE(entry_block_->Dominates(exit_block_)); + + EXPECT_EQ(header->GetLoopInformation(), loop_info); + EXPECT_EQ(loop_info->GetHeader(), header); + EXPECT_TRUE(loop_info->Contains(*loop_body)); + EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); +} + } // namespace art diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 7e83f8ce5f..33823e2a11 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,10 +30,10 @@ namespace art { class SuspendCheckTest : public OptimizingUnitTest { protected: - void TestCode(const uint16_t* data); + void TestCode(const std::vector<uint16_t>& data); }; -void SuspendCheckTest::TestCode(const uint16_t* data) { +void SuspendCheckTest::TestCode(const std::vector<uint16_t>& data) { HGraph* graph = CreateCFG(data); HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor(); HBasicBlock* loop_header = first_block->GetSingleSuccessor(); @@ -43,7 +43,7 @@ void SuspendCheckTest::TestCode(const uint16_t* data) { } TEST_F(SuspendCheckTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -51,14 +51,14 @@ TEST_F(SuspendCheckTest, CFG1) { } TEST_F(SuspendCheckTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data); } TEST_F(SuspendCheckTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 0xFFFF, Instruction::RETURN_VOID); @@ -67,7 +67,7 @@ TEST_F(SuspendCheckTest, CFG3) { } TEST_F(SuspendCheckTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NE, 0xFFFF, Instruction::RETURN_VOID); @@ -76,7 +76,7 @@ TEST_F(SuspendCheckTest, CFG4) { } TEST_F(SuspendCheckTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQZ, 0xFFFF, Instruction::RETURN_VOID); @@ -85,7 +85,7 @@ TEST_F(SuspendCheckTest, CFG5) { } TEST_F(SuspendCheckTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NEZ, 0xFFFF, Instruction::RETURN_VOID); |