diff options
Diffstat (limited to 'compiler')
35 files changed, 2321 insertions, 396 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index c59e36b597..d57f301ff9 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -351,6 +351,7 @@ art_cc_test { "optimizing/pretty_printer_test.cc", "optimizing/reference_type_propagation_test.cc", "optimizing/side_effects_test.cc", + "optimizing/ssa_liveness_analysis_test.cc", "optimizing/ssa_test.cc", "optimizing/stack_map_test.cc", "optimizing/suspend_check_test.cc", diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc index 53601033da..85d5784c7a 100644 --- a/compiler/dex/dex_to_dex_decompiler.cc +++ b/compiler/dex/dex_to_dex_decompiler.cc @@ -185,7 +185,7 @@ bool DexDecompiler::Decompile() { } if (quickened_info_ptr_ != quickened_info_end_) { - LOG(ERROR) << "Failed to use all values in quickening info." + LOG(FATAL) << "Failed to use all values in quickening info." << " Actual: " << std::hex << quickened_info_ptr_ << " Expected: " << quickened_info_end_; return false; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 057e3c9960..995098799c 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -480,7 +480,9 @@ static void Unquicken(const std::vector<const DexFile*>& dex_files, DCHECK(!it.HasNext()); } } - DCHECK_EQ(quickening_info_ptr, quickening_info_end) << "Failed to use all quickening info"; + if (quickening_info_ptr != quickening_info_end) { + LOG(FATAL) << "Failed to use all quickening info"; + } } void CompilerDriver::CompileAll(jobject class_loader, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 1e5c43d833..cbde587241 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -355,6 +355,10 @@ class CompilerDriver { return current_dex_to_dex_methods_; } + const ProfileCompilationInfo* GetProfileCompilationInfo() const { + return profile_compilation_info_; + } + private: // Can `referrer_class` access the resolved `member`? // Dispatch call to mirror::Class::CanAccessResolvedField or diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 562f97b3ae..35aa1eef2d 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -133,9 +133,10 @@ TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) { << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i)) << " " << dex.GetMethodName(dex.GetMethodId(i)); } - EXPECT_EQ(dex.NumFieldIds(), dex_cache->NumResolvedFields()); + EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields() + || dex.NumFieldIds() == dex_cache->NumResolvedFields()); for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { - ArtField* field = cl->GetResolvedField(i, dex_cache); + ArtField* field = dex_cache->GetResolvedField(i, cl->GetImagePointerSize()); EXPECT_TRUE(field != nullptr) << "field_idx=" << i << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i)) << " " << dex.GetFieldName(dex.GetFieldId(i)); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 65d82ed980..aa734561b6 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -968,11 +968,12 @@ void ImageWriter::PruneNonImageClasses() { << Class::PrettyClass(declaring_class) << " not in class linker table"; } } - ArtField** resolved_fields = dex_cache->GetResolvedFields(); + mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields(); for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { - ArtField* field = mirror::DexCache::GetElementPtrSize(resolved_fields, i, target_ptr_size_); + auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_); + ArtField* field = pair.object; if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) { - dex_cache->SetResolvedField(i, nullptr, target_ptr_size_); + dex_cache->ClearResolvedField(pair.index, target_ptr_size_); } } // Clean the dex field. It might have been populated during the initialization phase, but @@ -1577,10 +1578,8 @@ void ImageWriter::CalculateNewObjectOffsets() { } // Calculate the size of the class table. ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); - CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u); - mirror::ClassLoader* class_loader = compile_app_image_ ? *class_loaders_.begin() : nullptr; - DCHECK_EQ(image_info.class_table_->NumZygoteClasses(class_loader), 0u); - if (image_info.class_table_->NumNonZygoteClasses(class_loader) != 0u) { + DCHECK_EQ(image_info.class_table_->NumReferencedZygoteClasses(), 0u); + if (image_info.class_table_->NumReferencedNonZygoteClasses() != 0u) { image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr); } } @@ -1596,7 +1595,7 @@ void ImageWriter::CalculateNewObjectOffsets() { break; } case kBinDexCacheArray: - bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment()); + bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment(target_ptr_size_)); break; case kBinImTable: case kBinIMTConflictTable: { @@ -1925,9 +1924,8 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // above comment for intern tables. ClassTable temp_class_table; temp_class_table.ReadFromMemory(class_table_memory_ptr); - ObjPtr<mirror::ClassLoader> class_loader = GetClassLoader(); - CHECK_EQ(temp_class_table.NumZygoteClasses(class_loader), - table->NumNonZygoteClasses(class_loader) + table->NumZygoteClasses(class_loader)); + CHECK_EQ(temp_class_table.NumReferencedZygoteClasses(), + table->NumReferencedNonZygoteClasses() + table->NumReferencedZygoteClasses()); UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown)); temp_class_table.VisitRoots(visitor); } @@ -2236,16 +2234,17 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_); } } - ArtField** orig_fields = orig_dex_cache->GetResolvedFields(); + mirror::FieldDexCacheType* orig_fields = orig_dex_cache->GetResolvedFields(); if (orig_fields != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(), NativeLocationInImage(orig_fields), PointerSize::k64); - ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache); + mirror::FieldDexCacheType* copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache); for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) { - ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_); - ArtField* copy = NativeLocationInImage(orig); - mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_); + mirror::FieldDexCachePair orig = + mirror::DexCache::GetNativePairPtrSize(orig_fields, i, target_ptr_size_); + mirror::FieldDexCachePair copy(NativeLocationInImage(orig.object), orig.index); + mirror::DexCache::SetNativePairPtrSize(copy_fields, i, copy, target_ptr_size_); } } mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes(); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 43f606af65..afcdf5ea17 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -2270,28 +2270,11 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil /* verify */ true, /* verify_checksum */ true, &error_msg); - } else if (oat_dex_file->source_.IsRawFile()) { - File* raw_file = oat_dex_file->source_.GetRawFile(); - dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg); } else { - // The source data is a vdex file. - CHECK(oat_dex_file->source_.IsRawData()) + CHECK(oat_dex_file->source_.IsRawFile()) << static_cast<size_t>(oat_dex_file->source_.GetType()); - const uint8_t* raw_dex_file = oat_dex_file->source_.GetRawData(); - // Note: The raw data has already been checked to contain the header - // and all the data that the header specifies as the file size. - DCHECK(raw_dex_file != nullptr); - DCHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file->GetLocation())); - const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file); - // Since the source may have had its layout changed, don't verify the checksum. - dex_file = DexFile::Open(raw_dex_file, - header->file_size_, - location, - oat_dex_file->dex_file_location_checksum_, - nullptr, - /* verify */ true, - /* verify_checksum */ false, - &error_msg); + File* raw_file = oat_dex_file->source_.GetRawFile(); + dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg); } if (dex_file == nullptr) { LOG(ERROR) << "Failed to open dex file for layout: " << error_msg; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index bab626f5ae..e34f116b75 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -5304,18 +5304,29 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, return true; } Opcode neg_opcode = kNoOperand; + uint32_t neg_value = 0; switch (opcode) { - case AND: neg_opcode = BIC; value = ~value; break; - case ORR: neg_opcode = ORN; value = ~value; break; - case ADD: neg_opcode = SUB; value = -value; break; - case ADC: neg_opcode = SBC; value = ~value; break; - case SUB: neg_opcode = ADD; value = -value; break; - case SBC: neg_opcode = ADC; value = ~value; break; - case MOV: neg_opcode = MVN; value = ~value; break; + case AND: neg_opcode = BIC; neg_value = ~value; break; + case ORR: neg_opcode = ORN; neg_value = ~value; break; + case ADD: neg_opcode = SUB; neg_value = -value; break; + case ADC: neg_opcode = SBC; neg_value = ~value; break; + case SUB: neg_opcode = ADD; neg_value = -value; break; + case SBC: neg_opcode = ADC; neg_value = ~value; break; + case MOV: neg_opcode = MVN; neg_value = ~value; break; default: return false; } - return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so); + + if (assembler->ShifterOperandCanHold(kNoRegister, + kNoRegister, + neg_opcode, + neg_value, + set_cc, + &so)) { + return true; + } + + return opcode == AND && IsPowerOfTwo(value + 1); } void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, @@ -6217,21 +6228,59 @@ void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + // If both index and length are constants we can statically check the bounds. But if at least one + // of them is not encodable ArmEncodableConstantOrRegister will create + // Location::RequiresRegister() which is not desired to happen. Instead we create constant + // locations. + bool both_const = index->IsConstant() && length->IsConstant(); + locations->SetInAt(0, both_const + ? Location::ConstantLocation(index->AsConstant()) + : ArmEncodableConstantOrRegister(index, CMP)); + locations->SetInAt(1, both_const + ? Location::ConstantLocation(length->AsConstant()) + : ArmEncodableConstantOrRegister(length, CMP)); } void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = instruction->GetLocations(); - SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction); - codegen_->AddSlowPath(slow_path); - - Register index = locations->InAt(0).AsRegister<Register>(); - Register length = locations->InAt(1).AsRegister<Register>(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = helpers::Int32ConstantFrom(length_loc); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guaranteed to pass. + int32_t index = helpers::Int32ConstantFrom(index_loc); + if (index < 0 || index >= length) { + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } - __ cmp(index, ShifterOperand(length)); - __ b(slow_path->GetEntryLabel(), HS); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction); + __ cmp(index_loc.AsRegister<Register>(), ShifterOperand(length)); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), HS); + } else { + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction); + if (index_loc.IsConstant()) { + int32_t index = helpers::Int32ConstantFrom(index_loc); + __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index)); + } else { + __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index_loc.AsRegister<Register>())); + } + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), LS); + } } void CodeGeneratorARM::MarkGCCard(Register temp, @@ -7571,9 +7620,11 @@ void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, ShifterOperand so; if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) { __ and_(out, first, so); - } else { - DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)); + } else if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)) { __ bic(out, first, ShifterOperand(~value)); + } else { + DCHECK(IsPowerOfTwo(value + 1)); + __ ubfx(out, first, 0, WhichPowerOf2(value + 1)); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 97b61edbb9..28cc942dfb 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2395,7 +2395,7 @@ void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } default: @@ -2565,7 +2565,7 @@ void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instr new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index a1f30cd2b2..f5ada5224b 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -5315,18 +5315,24 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, return true; } Opcode neg_opcode = kNoOperand; + uint32_t neg_value = 0; switch (opcode) { - case AND: neg_opcode = BIC; value = ~value; break; - case ORR: neg_opcode = ORN; value = ~value; break; - case ADD: neg_opcode = SUB; value = -value; break; - case ADC: neg_opcode = SBC; value = ~value; break; - case SUB: neg_opcode = ADD; value = -value; break; - case SBC: neg_opcode = ADC; value = ~value; break; - case MOV: neg_opcode = MVN; value = ~value; break; + case AND: neg_opcode = BIC; neg_value = ~value; break; + case ORR: neg_opcode = ORN; neg_value = ~value; break; + case ADD: neg_opcode = SUB; neg_value = -value; break; + case ADC: neg_opcode = SBC; neg_value = ~value; break; + case SUB: neg_opcode = ADD; neg_value = -value; break; + case SBC: neg_opcode = ADC; neg_value = ~value; break; + case MOV: neg_opcode = MVN; neg_value = ~value; break; default: return false; } - return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc); + + if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) { + return true; + } + + return opcode == AND && IsPowerOfTwo(value + 1); } void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, @@ -6264,20 +6270,56 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1))); LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + // If both index and length are constants we can statically check the bounds. But if at least one + // of them is not encodable ArmEncodableConstantOrRegister will create + // Location::RequiresRegister() which is not desired to happen. Instead we create constant + // locations. + bool both_const = index->IsConstant() && length->IsConstant(); + locations->SetInAt(0, both_const + ? Location::ConstantLocation(index->AsConstant()) + : ArmEncodableConstantOrRegister(index, CMP)); + locations->SetInAt(1, both_const + ? Location::ConstantLocation(length->AsConstant()) + : ArmEncodableConstantOrRegister(length, CMP)); } void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { - SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); - codegen_->AddSlowPath(slow_path); - - vixl32::Register index = InputRegisterAt(instruction, 0); - vixl32::Register length = InputRegisterAt(instruction, 1); + LocationSummary* locations = instruction->GetLocations(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = Int32ConstantFrom(length_loc); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guaranteed to pass. + int32_t index = Int32ConstantFrom(index_loc); + if (index < 0 || index >= length) { + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } - __ Cmp(index, length); - __ B(hs, slow_path->GetEntryLabel()); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + __ Cmp(RegisterFrom(index_loc), length); + codegen_->AddSlowPath(slow_path); + __ B(hs, slow_path->GetEntryLabel()); + } else { + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0)); + codegen_->AddSlowPath(slow_path); + __ B(ls, slow_path->GetEntryLabel()); + } } void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, @@ -7631,10 +7673,12 @@ void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out, return; } if (GetAssembler()->ShifterOperandCanHold(AND, value)) { - __ And(out, first, value); + __ And(out, first, value); + } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) { + __ Bic(out, first, ~value); } else { - DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value)); - __ Bic(out, first, ~value); + DCHECK(IsPowerOfTwo(value + 1)); + __ Ubfx(out, first, 0, WhichPowerOf2(value + 1)); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 781027ab30..ef01a478f4 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -35,11 +35,11 @@ #include "aarch32/macro-assembler-aarch32.h" #pragma GCC diagnostic pop -// Default to use the VIXL-based backend on ARM. -#ifdef ART_USE_OLD_ARM_BACKEND -static constexpr bool kArmUseVIXL32 = false; -#else +// True if VIXL32 should be used for codegen on ARM. +#ifdef ART_USE_VIXL_ARM_BACKEND static constexpr bool kArmUseVIXL32 = true; +#else +static constexpr bool kArmUseVIXL32 = false; #endif namespace art { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 02c3ad6e39..4814b224ad 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1681,6 +1681,25 @@ void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) } } +Location LocationsBuilderMIPS64::RegisterOrZeroConstant(HInstruction* instruction) { + return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern()) + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresRegister(); +} + +Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* instruction) { + // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register. + // We can store a non-zero float or double constant without first loading it into the FPU, + // but we should only prefer this if the constant has a single use. + if (instruction->IsConstant() && + (instruction->AsConstant()->IsZeroBitPattern() || + instruction->GetUses().HasExactlyOneElement())) { + return Location::ConstantLocation(instruction->AsConstant()); + // Otherwise fall through and require an FPU register for the constant. + } + return Location::RequiresFpuRegister(); +} + void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( @@ -1695,9 +1714,9 @@ void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); } } } @@ -1706,24 +1725,29 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location index = locations->InAt(1); + Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); auto null_checker = GetImplicitNullChecker(instruction, codegen_); + GpuRegister base_reg = index.IsConstant() ? obj : TMP; switch (value_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset, null_checker); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; } else { - __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker); + __ Daddu(base_reg, obj, index.AsRegister<GpuRegister>()); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker); } break; } @@ -1731,15 +1755,18 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; } else { - __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2); - __ Daddu(TMP, obj, TMP); - __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker); } break; } @@ -1748,54 +1775,57 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { if (!needs_runtime_call) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - GpuRegister base_reg; - GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; - base_reg = obj; } else { DCHECK(index.IsRegister()) << index; - __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); - __ Daddu(TMP, obj, TMP); - base_reg = TMP; + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); } - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Use Sw() instead of StoreToOffset() in order to be able to - // hold the poisoned reference in AT and thus avoid allocating - // yet another temporary register. - if (index.IsConstant()) { - if (!IsInt<16>(static_cast<int32_t>(data_offset))) { - int16_t low16 = Low16Bits(data_offset); - // For consistency with StoreToOffset() and such treat data_offset as int32_t. - uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16; - int16_t upper16 = High16Bits(high48); - // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a - // compensatory 64KB added, which may push `high48` above 2GB and require - // the dahi instruction. - int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0); - __ Daui(TMP, obj, upper16); - if (higher16 != 0) { - __ Dahi(TMP, higher16); + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(value_type, Primitive::kPrimNot); + // Use Sw() instead of StoreToOffset() in order to be able to + // hold the poisoned reference in AT and thus avoid allocating + // yet another temporary register. + if (index.IsConstant()) { + if (!IsInt<16>(static_cast<int32_t>(data_offset))) { + int16_t low16 = Low16Bits(data_offset); + // For consistency with StoreToOffset() and such treat data_offset as int32_t. + uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16; + int16_t upper16 = High16Bits(high48); + // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a + // compensatory 64KB added, which may push `high48` above 2GB and require + // the dahi instruction. + int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0); + __ Daui(TMP, obj, upper16); + if (higher16 != 0) { + __ Dahi(TMP, higher16); + } + base_reg = TMP; + data_offset = low16; } - base_reg = TMP; - data_offset = low16; + } else { + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); } + __ PoisonHeapReference(AT, value); + __ Sw(AT, base_reg, data_offset); + null_checker(); } else { - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } + if (needs_write_barrier) { + DCHECK_EQ(value_type, Primitive::kPrimNot); + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); } - __ PoisonHeapReference(AT, value); - __ Sw(AT, base_reg, data_offset); - null_checker(); - } else { - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); @@ -1809,47 +1839,54 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; } else { - __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); - __ Daddu(TMP, obj, TMP); - __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>(); - DCHECK(locations->InAt(2).IsFpuRegister()); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreFpuToOffset(kStoreWord, value, obj, offset, null_checker); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); - __ Daddu(TMP, obj, TMP); - __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset, null_checker); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + FpuRegister value = value_location.AsFpuRegister<FpuRegister>(); + __ StoreFpuToOffset(kStoreWord, value, base_reg, data_offset, null_checker); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>(); - DCHECK(locations->InAt(2).IsFpuRegister()); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset, null_checker); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; } else { - __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); - __ Daddu(TMP, obj, TMP); - __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker); + } else { + FpuRegister value = value_location.AsFpuRegister<FpuRegister>(); + __ StoreFpuToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker); } break; } @@ -3326,9 +3363,9 @@ void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction, new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); } else { - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); } } @@ -3338,6 +3375,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location value_location = locations->InAt(1); StoreOperandType store_type = kStoreByte; uint32_t offset = field_info.GetFieldOffset().Uint32Value(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); @@ -3365,29 +3403,34 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - if (!Primitive::IsFloatingPointType(type)) { - DCHECK(locations->InAt(1).IsRegister()); - GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(type, Primitive::kPrimNot); - __ PoisonHeapReference(TMP, src); - __ StoreToOffset(store_type, TMP, obj, offset, null_checker); + + if (value_location.IsConstant()) { + int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); + } else { + if (!Primitive::IsFloatingPointType(type)) { + DCHECK(value_location.IsRegister()); + GpuRegister src = value_location.AsRegister<GpuRegister>(); + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(type, Primitive::kPrimNot); + __ PoisonHeapReference(TMP, src); + __ StoreToOffset(store_type, TMP, obj, offset, null_checker); + } else { + __ StoreToOffset(store_type, src, obj, offset, null_checker); + } } else { - __ StoreToOffset(store_type, src, obj, offset, null_checker); + DCHECK(value_location.IsFpuRegister()); + FpuRegister src = value_location.AsFpuRegister<FpuRegister>(); + __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); } - } else { - DCHECK(locations->InAt(1).IsFpuRegister()); - FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>(); - __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); } - // TODO: memory barriers? if (needs_write_barrier) { - DCHECK(locations->InAt(1).IsRegister()); - GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>(); + DCHECK(value_location.IsRegister()); + GpuRegister src = value_location.AsRegister<GpuRegister>(); codegen_->MarkGCCard(obj, src, value_can_be_null); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 3056f7f464..6040dc9492 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -189,6 +189,8 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + Location RegisterOrZeroConstant(HInstruction* instruction); + Location FpuRegisterOrConstantForStore(HInstruction* instruction); InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_; diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 0c832a5c35..cfcb276a98 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -17,23 +17,23 @@ #include "dex_cache_array_fixups_arm.h" #include "base/arena_containers.h" -#ifdef ART_USE_OLD_ARM_BACKEND -#include "code_generator_arm.h" -#include "intrinsics_arm.h" -#else +#ifdef ART_USE_VIXL_ARM_BACKEND #include "code_generator_arm_vixl.h" #include "intrinsics_arm_vixl.h" +#else +#include "code_generator_arm.h" +#include "intrinsics_arm.h" #endif #include "utils/dex_cache_arrays_layout-inl.h" namespace art { namespace arm { -#ifdef ART_USE_OLD_ARM_BACKEND -typedef CodeGeneratorARM CodeGeneratorARMType; -typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType; -#else +#ifdef ART_USE_VIXL_ARM_BACKEND typedef CodeGeneratorARMVIXL CodeGeneratorARMType; typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType; +#else +typedef CodeGeneratorARM CodeGeneratorARMType; +typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType; #endif /** diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 0b96005a17..583008bbe8 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -249,20 +249,25 @@ class ScopedProfilingInfoInlineUse { ProfilingInfo* const profiling_info_; }; -static bool IsMonomorphic(Handle<mirror::ObjectArray<mirror::Class>> classes) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK_GE(InlineCache::kIndividualCacheSize, 2); - return classes->Get(0) != nullptr && classes->Get(1) == nullptr; -} - -static bool IsMegamorphic(Handle<mirror::ObjectArray<mirror::Class>> classes) - REQUIRES_SHARED(Locks::mutator_lock_) { - for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { - if (classes->Get(i) == nullptr) { - return false; +HInliner::InlineCacheType HInliner::GetInlineCacheType( + const Handle<mirror::ObjectArray<mirror::Class>>& classes) + REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t number_of_types = 0; + for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) { + if (classes->Get(number_of_types) == nullptr) { + break; } } - return true; + + if (number_of_types == 0) { + return kInlineCacheUninitialized; + } else if (number_of_types == 1) { + return kInlineCacheMonomorphic; + } else if (number_of_types == InlineCache::kIndividualCacheSize) { + return kInlineCacheMegamorphic; + } else { + return kInlineCachePolymorphic; + } } static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) @@ -271,18 +276,6 @@ static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Clas return classes->Get(0); } -static bool IsUninitialized(Handle<mirror::ObjectArray<mirror::Class>> classes) - REQUIRES_SHARED(Locks::mutator_lock_) { - return classes->Get(0) == nullptr; -} - -static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK_GE(InlineCache::kIndividualCacheSize, 3); - return classes->Get(1) != nullptr && - classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr; -} - ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { if (!resolved_method->HasSingleImplementation()) { return nullptr; @@ -353,67 +346,209 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } return result; } - DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); - // Check if we can use an inline cache. - ArtMethod* caller = graph_->GetArtMethod(); - if (Runtime::Current()->UseJitCompilation()) { - // Under JIT, we should always know the caller. - DCHECK(caller != nullptr); - ScopedProfilingInfoInlineUse spiis(caller, soa.Self()); - ProfilingInfo* profiling_info = spiis.GetProfilingInfo(); - if (profiling_info != nullptr) { - StackHandleScope<1> hs(soa.Self()); - ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs.NewHandle( - mirror::ObjectArray<mirror::Class>::Alloc( - soa.Self(), - class_linker->GetClassRoot(ClassLinker::kClassArrayClass), - InlineCache::kIndividualCacheSize)); - if (inline_cache == nullptr) { - // We got an OOME. Just clear the exception, and don't inline. - DCHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); - VLOG(compiler) << "Out of memory in the compiler when trying to inline"; - return false; + // Try using inline caches. + return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method); +} + +static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( + const DexCompilationUnit& compilation_unit, + StackHandleScope<1>* hs) + REQUIRES_SHARED(Locks::mutator_lock_) { + Thread* self = Thread::Current(); + ClassLinker* class_linker = compilation_unit.GetClassLinker(); + Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle( + mirror::ObjectArray<mirror::Class>::Alloc( + self, + class_linker->GetClassRoot(ClassLinker::kClassArrayClass), + InlineCache::kIndividualCacheSize)); + if (inline_cache == nullptr) { + // We got an OOME. Just clear the exception, and don't inline. + DCHECK(self->IsExceptionPending()); + self->ClearException(); + VLOG(compiler) << "Out of memory in the compiler when trying to inline"; + } + return inline_cache; +} + +bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + ArtMethod* resolved_method) + REQUIRES_SHARED(Locks::mutator_lock_) { + StackHandleScope<1> hs(Thread::Current()); + Handle<mirror::ObjectArray<mirror::Class>> inline_cache; + InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler() + ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) + : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); + + switch (inline_cache_type) { + case kInlineCacheNoData: + break; + + case kInlineCacheUninitialized: + VLOG(compiler) << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is not hit and not inlined"; + return false; + + case kInlineCacheMonomorphic: + MaybeRecordStat(kMonomorphicCall); + if (outermost_graph_->IsCompilingOsr()) { + // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the + // interpreter and it may have seen different receiver types. + return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); } else { - Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto( - *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()), - inline_cache); - if (IsUninitialized(inline_cache)) { - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(method_index) - << " is not hit and not inlined"; - return false; - } else if (IsMonomorphic(inline_cache)) { - MaybeRecordStat(kMonomorphicCall); - if (outermost_graph_->IsCompilingOsr()) { - // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the - // interpreter and it may have seen different receiver types. - return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); - } else { - return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache); - } - } else if (IsPolymorphic(inline_cache)) { - MaybeRecordStat(kPolymorphicCall); - return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); - } else { - DCHECK(IsMegamorphic(inline_cache)); - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(method_index) - << " is megamorphic and not inlined"; - MaybeRecordStat(kMegamorphicCall); - return false; - } + return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache); } + + case kInlineCachePolymorphic: + MaybeRecordStat(kPolymorphicCall); + return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); + + case kInlineCacheMegamorphic: + VLOG(compiler) << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is megamorphic and not inlined"; + MaybeRecordStat(kMegamorphicCall); + return false; + + case kInlineCacheMissingTypes: + VLOG(compiler) << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is missing types and not inlined"; + return false; + } + UNREACHABLE(); +} + +HInliner::InlineCacheType HInliner::GetInlineCacheJIT( + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(Runtime::Current()->UseJitCompilation()); + + ArtMethod* caller = graph_->GetArtMethod(); + // Under JIT, we should always know the caller. + DCHECK(caller != nullptr); + ScopedProfilingInfoInlineUse spiis(caller, Thread::Current()); + ProfilingInfo* profiling_info = spiis.GetProfilingInfo(); + + if (profiling_info == nullptr) { + return kInlineCacheNoData; + } + + *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs); + if (inline_cache->Get() == nullptr) { + // We can't extract any data if we failed to allocate; + return kInlineCacheNoData; + } else { + Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto( + *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()), + *inline_cache); + return GetInlineCacheType(*inline_cache); + } +} + +HInliner::InlineCacheType HInliner::GetInlineCacheAOT( + const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(Runtime::Current()->IsAotCompiler()); + const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo(); + if (pci == nullptr) { + return kInlineCacheNoData; + } + + ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile; + bool found = pci->GetMethod(caller_dex_file.GetLocation(), + caller_dex_file.GetLocationChecksum(), + caller_compilation_unit_.GetDexMethodIndex(), + &offline_profile); + if (!found) { + return kInlineCacheNoData; // no profile information for this invocation. + } + + *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs); + if (inline_cache == nullptr) { + // We can't extract any data if we failed to allocate; + return kInlineCacheNoData; + } else { + return ExtractClassesFromOfflineProfile(invoke_instruction, + offline_profile, + *inline_cache); + } +} + +HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( + const HInvoke* invoke_instruction, + const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile, + /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc()); + if (it == offline_profile.inline_caches.end()) { + return kInlineCacheUninitialized; + } + + const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second; + + if (dex_pc_data.is_missing_types) { + return kInlineCacheMissingTypes; + } + if (dex_pc_data.is_megamorphic) { + return kInlineCacheMegamorphic; + } + + DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize); + Thread* self = Thread::Current(); + // We need to resolve the class relative to the containing dex file. + // So first, build a mapping from the index of dex file in the profile to + // its dex cache. This will avoid repeating the lookup when walking over + // the inline cache types. + std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache( + offline_profile.dex_references.size()); + for (size_t i = 0; i < offline_profile.dex_references.size(); i++) { + bool found = false; + for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) { + if (offline_profile.dex_references[i].MatchesDex(dex_file)) { + dex_profile_index_to_dex_cache[i] = + caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file); + found = true; + } + } + if (!found) { + VLOG(compiler) << "Could not find profiled dex file: " + << offline_profile.dex_references[i].dex_location; + return kInlineCacheMissingTypes; } } - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(method_index) - << " could not be statically determined"; - return false; + // Walk over the classes and resolve them. If we cannot find a type we return + // kInlineCacheMissingTypes. + int ic_index = 0; + for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) { + ObjPtr<mirror::DexCache> dex_cache = + dex_profile_index_to_dex_cache[class_ref.dex_profile_index]; + DCHECK(dex_cache != nullptr); + ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType( + class_ref.type_index, + dex_cache, + caller_compilation_unit_.GetClassLoader().Get()); + if (clazz != nullptr) { + inline_cache->Set(ic_index++, clazz); + } else { + VLOG(compiler) << "Could not resolve class from inline cache in AOT mode " + << caller_compilation_unit_.GetDexFile()->PrettyMethod( + invoke_instruction->GetDexMethodIndex()) << " : " + << caller_compilation_unit_ + .GetDexFile()->StringByTypeIdx(class_ref.type_index); + return kInlineCacheMissingTypes; + } + } + return GetInlineCacheType(inline_cache); } HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, @@ -556,6 +691,13 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, // Insert before setting the kind, as setting the kind affects the inputs. bb_cursor->InsertInstructionAfter(load_class, receiver_class); load_class->SetLoadKind(kind); + // In AOT mode, we will most likely load the class from BSS, which will involve a call + // to the runtime. In this case, the load instruction will need an environment so copy + // it from the invoke instruction. + if (load_class->NeedsEnvironment()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + } HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); @@ -746,7 +888,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( ArtMethod* resolved_method, Handle<mirror::ObjectArray<mirror::Class>> classes) { // This optimization only works under JIT for now. - DCHECK(Runtime::Current()->UseJitCompilation()); + if (!Runtime::Current()->UseJitCompilation()) { + return false; + } + if (graph_->GetInstructionSet() == kMips64) { // TODO: Support HClassTableGet for mips64. return false; @@ -1064,9 +1209,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, // TODO: Needs null check. return false; } - Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache())); HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg); - HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj); + HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj); DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset); DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile); invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction); @@ -1079,10 +1223,9 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, // TODO: Needs null check. return false; } - Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache())); HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg); HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg); - HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value); + HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value); DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset); DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile); invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction); @@ -1116,24 +1259,19 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, [](uint16_t index) { return index != DexFile::kDexNoIndex16; })); // Create HInstanceFieldSet for each IPUT that stores non-zero data. - Handle<mirror::DexCache> dex_cache; HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u); bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) { - if (dex_cache.GetReference() == nullptr) { - dex_cache = handles_->NewHandle(resolved_method->GetDexCache()); - } uint16_t field_index = iput_field_indexes[i]; - HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value); + bool is_final; + HInstanceFieldSet* iput = + CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final); invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction); // Check whether the field is final. If it is, we need to add a barrier. - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); - DCHECK(resolved_field != nullptr); - if (resolved_field->IsFinal()) { + if (is_final) { needs_constructor_barrier = true; } } @@ -1152,12 +1290,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, return true; } -HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, +HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj) REQUIRES_SHARED(Locks::mutator_lock_) { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = + class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); DCHECK(resolved_field != nullptr); HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet( obj, @@ -1167,12 +1306,13 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), - *dex_cache->GetDexFile(), + *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); if (iget->GetType() == Primitive::kPrimNot) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. + Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache()); ReferenceTypePropagation rtp(graph_, outer_compilation_unit_.GetClassLoader(), dex_cache, @@ -1183,14 +1323,21 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex return iget; } -HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, +HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj, - HInstruction* value) + HInstruction* value, + bool* is_final) REQUIRES_SHARED(Locks::mutator_lock_) { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = + class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); DCHECK(resolved_field != nullptr); + if (is_final != nullptr) { + // This information is needed only for constructors. + DCHECK(referrer->IsConstructor()); + *is_final = resolved_field->IsFinal(); + } HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet( obj, value, @@ -1200,7 +1347,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), - *dex_cache->GetDexFile(), + *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 75d025ae41..a032042c78 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -20,6 +20,7 @@ #include "dex_file_types.h" #include "invoke_type.h" #include "optimization.h" +#include "jit/profile_compilation_info.h" namespace art { @@ -59,6 +60,15 @@ class HInliner : public HOptimization { static constexpr const char* kInlinerPassName = "inliner"; private: + enum InlineCacheType { + kInlineCacheNoData = 0, + kInlineCacheUninitialized = 1, + kInlineCacheMonomorphic = 2, + kInlineCachePolymorphic = 3, + kInlineCacheMegamorphic = 4, + kInlineCacheMissingTypes = 5 + }; + bool TryInline(HInvoke* invoke_instruction); // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether @@ -97,14 +107,54 @@ class HInliner : public HOptimization { REQUIRES_SHARED(Locks::mutator_lock_); // Create a new HInstanceFieldGet. - HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, + HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj); // Create a new HInstanceFieldSet. - HInstanceFieldSet* CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, + HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj, - HInstruction* value); + HInstruction* value, + bool* is_final = nullptr); + + // Try inlining the invoke instruction using inline caches. + bool TryInlineFromInlineCache( + const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + ArtMethod* resolved_method) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Try getting the inline cache from JIT code cache. + // Return true if the inline cache was successfully allocated and the + // invoke info was found in the profile info. + InlineCacheType GetInlineCacheJIT( + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Try getting the inline cache from AOT offline profile. + // Return true if the inline cache was successfully allocated and the + // invoke info was found in the profile info. + InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Extract the mirror classes from the offline profile and add them to the `inline_cache`. + // Note that even if we have profile data for the invoke the inline_cache might contain + // only null entries if the types cannot be resolved. + InlineCacheType ExtractClassesFromOfflineProfile( + const HInvoke* invoke_instruction, + const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile, + /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Compute the inline cache type. + InlineCacheType GetInlineCacheType( + const Handle<mirror::ObjectArray<mirror::Class>>& classes) + REQUIRES_SHARED(Locks::mutator_lock_); // Try to inline the target of a monomorphic call. If successful, the code // in the graph will look like: diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 934ba1b9fb..807d6cf54f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1560,7 +1560,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { // Load `count` field of the argument string and check if it matches the const string. // Also compares the compression style, if differs return false. __ Ldr(temp, MemOperand(arg.X(), count_offset)); + // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate. + scratch_scope.Release(temp1); __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); + temp1 = scratch_scope.AcquireW(); __ B(&return_false, ne); } else { // Load `count` fields of this and argument strings. diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 490e50cb77..0e02311672 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -24,17 +24,17 @@ #include "optimizing/code_generator.h" #include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" -#ifdef ART_USE_OLD_ARM_BACKEND -#include "utils/arm/assembler_thumb2.h" -#else +#ifdef ART_USE_VIXL_ARM_BACKEND #include "utils/arm/assembler_arm_vixl.h" +#else +#include "utils/arm/assembler_thumb2.h" #endif #include "utils/mips/assembler_mips.h" #include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" -#ifndef ART_USE_OLD_ARM_BACKEND +#ifdef ART_USE_VIXL_ARM_BACKEND namespace vixl32 = vixl::aarch32; using vixl32::r0; @@ -196,15 +196,7 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { expected_cfi_kThumb2_adjust, expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust)); SetUpFrame(kThumb2); -#ifdef ART_USE_OLD_ARM_BACKEND -#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())-> - Label target; - __ CompareAndBranchIfZero(arm::R0, &target); - // Push the target out of range of CBZ. - for (size_t i = 0; i != 65; ++i) { - __ ldr(arm::R0, arm::Address(arm::R0)); - } -#else +#ifdef ART_USE_VIXL_ARM_BACKEND #define __ down_cast<arm::ArmVIXLAssembler*>(GetCodeGenerator() \ ->GetAssembler())->GetVIXLAssembler()-> vixl32::Label target; @@ -213,6 +205,14 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { for (size_t i = 0; i != 65; ++i) { __ Ldr(r0, vixl32::MemOperand(r0)); } +#else +#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())-> + Label target; + __ CompareAndBranchIfZero(arm::R0, &target); + // Push the target out of range of CBZ. + for (size_t i = 0; i != 65; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } #endif __ Bind(&target); #undef __ diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index d84fe6ccff..82670c38fe 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -223,15 +223,15 @@ static constexpr uint8_t expected_cfi_kMips64[] = { // 0x00000040: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kThumb2_adjust[] = { -#ifdef ART_USE_OLD_ARM_BACKEND - 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, - 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, -#else +#ifdef ART_USE_VIXL_ARM_BACKEND // VIXL emits an extra 2 bytes here for a 32-bit beq as there is no // optimistic 16-bit emit and subsequent fixup for out of reach targets - // as with the old assembler. + // as with the current assembler. 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, 0x00, 0xF0, 0x41, 0x80, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, +#else + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, + 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, #endif 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, @@ -247,10 +247,10 @@ static constexpr uint8_t expected_asm_kThumb2_adjust[] = { }; static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, -#ifdef ART_USE_OLD_ARM_BACKEND - 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A, -#else +#ifdef ART_USE_VIXL_ARM_BACKEND 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A, +#else + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A, #endif 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, 0x40, diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 59523a93a0..8a9c1ccaff 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -306,7 +306,7 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { : Location::StackSlot(interval->GetParent()->GetSpillSlot())); } UsePosition* use = current->GetFirstUse(); - UsePosition* env_use = current->GetFirstEnvironmentUse(); + EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); // Walk over all siblings, updating locations of use positions, and // connecting them when they are adjacent. @@ -323,7 +323,6 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { use = use->GetNext(); } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { - DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); if (!use->IsSynthesized()) { LocationSummary* locations = use->GetUser()->GetLocations(); diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 2227872f76..667afb1ec3 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -912,9 +912,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Create an interval with lifetime holes. static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one); - first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_); - first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_); - first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_); + first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_); locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); @@ -934,9 +934,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // before lifetime position 6 yet. static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three); - third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_); - third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_); - third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_); + third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_); locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); third = third->SplitAt(3); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index a239bd50c2..340d0ccefe 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ -#include "nodes.h" #include <iostream> +#include "nodes.h" + namespace art { class CodeGenerator; @@ -103,21 +104,20 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { */ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { public: - UsePosition(HInstruction* user, - HEnvironment* environment, - size_t input_index, - size_t position, - UsePosition* next) + UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next) : user_(user), - environment_(environment), input_index_(input_index), position_(position), next_(next) { - DCHECK(environment == nullptr || user == nullptr); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } - static constexpr size_t kNoInput = -1; + explicit UsePosition(size_t position) + : user_(nullptr), + input_index_(kNoInput), + position_(dchecked_integral_cast<uint32_t>(position)), + next_(nullptr) { + } size_t GetPosition() const { return position_; } @@ -125,9 +125,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { void SetNext(UsePosition* next) { next_ = next; } HInstruction* GetUser() const { return user_; } - HEnvironment* GetEnvironment() const { return environment_; } - bool GetIsEnvironment() const { return environment_ != nullptr; } bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } @@ -142,20 +140,20 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { UsePosition* Dup(ArenaAllocator* allocator) const { return new (allocator) UsePosition( - user_, environment_, input_index_, position_, + user_, input_index_, position_, next_ == nullptr ? nullptr : next_->Dup(allocator)); } bool RequiresRegister() const { - if (GetIsEnvironment()) return false; if (IsSynthesized()) return false; Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); return location.IsUnallocated() && location.RequiresRegisterKind(); } private: + static constexpr uint32_t kNoInput = static_cast<uint32_t>(-1); + HInstruction* const user_; - HEnvironment* const environment_; const size_t input_index_; const size_t position_; UsePosition* next_; @@ -163,6 +161,50 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { DISALLOW_COPY_AND_ASSIGN(UsePosition); }; +/** + * An environment use position represents a live interval for environment use at a given position. + */ +class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> { + public: + EnvUsePosition(HEnvironment* environment, + size_t input_index, + size_t position, + EnvUsePosition* next) + : environment_(environment), + input_index_(input_index), + position_(position), + next_(next) { + DCHECK(environment != nullptr); + DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); + } + + size_t GetPosition() const { return position_; } + + EnvUsePosition* GetNext() const { return next_; } + void SetNext(EnvUsePosition* next) { next_ = next; } + + HEnvironment* GetEnvironment() const { return environment_; } + size_t GetInputIndex() const { return input_index_; } + + void Dump(std::ostream& stream) const { + stream << position_; + } + + EnvUsePosition* Dup(ArenaAllocator* allocator) const { + return new (allocator) EnvUsePosition( + environment_, input_index_, position_, + next_ == nullptr ? nullptr : next_->Dup(allocator)); + } + + private: + HEnvironment* const environment_; + const size_t input_index_; + const size_t position_; + EnvUsePosition* next_; + + DISALLOW_COPY_AND_ASSIGN(EnvUsePosition); +}; + class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { public: explicit SafepointPosition(HInstruction* instruction) @@ -227,7 +269,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user"; size_t position = instruction->GetLifetimePosition(); first_use_ = new (allocator_) UsePosition( - instruction, /* environment */ nullptr, temp_index, position, first_use_); + instruction, temp_index, position, first_use_); AddRange(position, position + 1); } @@ -276,7 +318,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } DCHECK(first_use_->GetPosition() + 1 == position); UsePosition* new_use = new (allocator_) UsePosition( - instruction, nullptr /* environment */, input_index, position, cursor->GetNext()); + instruction, input_index, position, cursor->GetNext()); cursor->SetNext(new_use); if (first_range_->GetEnd() == first_use_->GetPosition()) { first_range_->end_ = position; @@ -285,11 +327,11 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } if (is_environment) { - first_env_use_ = new (allocator_) UsePosition( - nullptr /* instruction */, environment, input_index, position, first_env_use_); + first_env_use_ = new (allocator_) EnvUsePosition( + environment, input_index, position, first_env_use_); } else { first_use_ = new (allocator_) UsePosition( - instruction, nullptr /* environment */, input_index, position, first_use_); + instruction, input_index, position, first_use_); } if (is_environment && !keep_alive) { @@ -328,7 +370,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { AddBackEdgeUses(*block); } first_use_ = new (allocator_) UsePosition( - instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_); + instruction, input_index, block->GetLifetimeEnd(), first_use_); } ALWAYS_INLINE void AddRange(size_t start, size_t end) { @@ -538,7 +580,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return first_use_; } - UsePosition* GetFirstEnvironmentUse() const { + EnvUsePosition* GetFirstEnvironmentUse() const { return first_env_use_; } @@ -676,7 +718,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { current = current->GetNext(); } stream << "}, uses: { "; - UsePosition* use = first_use_; + const UsePosition* use = first_use_; if (use != nullptr) { do { use->Dump(stream); @@ -684,12 +726,12 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } while ((use = use->GetNext()) != nullptr); } stream << "}, { "; - use = first_env_use_; - if (use != nullptr) { + const EnvUsePosition* env_use = first_env_use_; + if (env_use != nullptr) { do { - use->Dump(stream); + env_use->Dump(stream); stream << " "; - } while ((use = use->GetNext()) != nullptr); + } while ((env_use = env_use->GetNext()) != nullptr); } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); @@ -1015,12 +1057,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { DCHECK(last_in_new_list == nullptr || back_edge_use_position > last_in_new_list->GetPosition()); - UsePosition* new_use = new (allocator_) UsePosition( - /* user */ nullptr, - /* environment */ nullptr, - UsePosition::kNoInput, - back_edge_use_position, - /* next */ nullptr); + UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position); if (last_in_new_list != nullptr) { // Going outward. The latest created use needs to point to the new use. @@ -1056,7 +1093,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Uses of this interval. Note that this linked list is shared amongst siblings. UsePosition* first_use_; - UsePosition* first_env_use_; + EnvUsePosition* first_env_use_; // The instruction type this interval corresponds to. const Primitive::Type type_; @@ -1210,8 +1247,7 @@ class SsaLivenessAnalysis : public ValueObject { // Returns whether `instruction` in an HEnvironment held by `env_holder` // should be kept live by the HEnvironment. - static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, - HInstruction* instruction) { + static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) { if (instruction == nullptr) return false; // A value that's not live in compiled code may still be needed in interpreter, // due to code motion, etc. diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc new file mode 100644 index 0000000000..1916c73ca4 --- /dev/null +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "driver/compiler_options.h" +#include "code_generator.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "ssa_liveness_analysis.h" + +namespace art { + +class SsaLivenessAnalysisTest : public testing::Test { + public: + SsaLivenessAnalysisTest() + : pool_(), + allocator_(&pool_), + graph_(CreateGraph(&allocator_)), + compiler_options_(), + instruction_set_(kRuntimeISA) { + std::string error_msg; + instruction_set_features_ = + InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg); + codegen_ = CodeGenerator::Create(graph_, + instruction_set_, + *instruction_set_features_, + compiler_options_); + CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture."; + // Create entry block. + entry_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry_); + graph_->SetEntryBlock(entry_); + } + + protected: + HBasicBlock* CreateSuccessor(HBasicBlock* block) { + HGraph* graph = block->GetGraph(); + HBasicBlock* successor = new (&allocator_) HBasicBlock(graph); + graph->AddBlock(successor); + block->AddSuccessor(successor); + return successor; + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + CompilerOptions compiler_options_; + InstructionSet instruction_set_; + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + std::unique_ptr<CodeGenerator> codegen_; + HBasicBlock* entry_; +}; + +TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { + HInstruction* arg = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + entry_->AddInstruction(arg); + + HBasicBlock* block = CreateSuccessor(entry_); + HInstruction* ret = new (&allocator_) HReturn(arg); + block->AddInstruction(ret); + block->AddInstruction(new (&allocator_) HExit()); + + graph_->BuildDominatorTree(); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + ssa_analysis.Analyze(); + + std::ostringstream arg_dump; + arg->GetLiveInterval()->Dump(arg_dump); + EXPECT_STREQ("ranges: { [2,6) }, uses: { 6 }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + arg_dump.str().c_str()); +} + +TEST_F(SsaLivenessAnalysisTest, TestAput) { + HInstruction* array = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* index = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* value = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt); + HInstruction* extra_arg1 = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt); + HInstruction* extra_arg2 = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot); + ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 }, + allocator_.Adapter()); + for (HInstruction* insn : args) { + entry_->AddInstruction(insn); + } + + HBasicBlock* block = CreateSuccessor(entry_); + HInstruction* null_check = new (&allocator_) HNullCheck(array, 0); + block->AddInstruction(null_check); + HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_, + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + null_check); + null_check_env->CopyFrom(args); + null_check->SetRawEnvironment(null_check_env); + HInstruction* length = new (&allocator_) HArrayLength(array, 0); + block->AddInstruction(length); + HInstruction* bounds_check = new (&allocator_) HBoundsCheck(index, length, /* dex_pc */ 0u); + block->AddInstruction(bounds_check); + HEnvironment* bounds_check_env = new (&allocator_) HEnvironment(&allocator_, + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + bounds_check); + bounds_check_env->CopyFrom(args); + bounds_check->SetRawEnvironment(bounds_check_env); + HInstruction* array_set = + new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0); + block->AddInstruction(array_set); + + graph_->BuildDominatorTree(); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + ssa_analysis.Analyze(); + + EXPECT_FALSE(graph_->IsDebuggable()); + EXPECT_EQ(18u, bounds_check->GetLifetimePosition()); + static const char* const expected[] = { + "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "is_high: 0", + "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "is_high: 0", + "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "is_high: 0", + // Environment uses do not keep the non-reference argument alive. + "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + // Environment uses keep the reference argument alive. + "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + }; + ASSERT_EQ(arraysize(expected), args.size()); + size_t arg_index = 0u; + for (HInstruction* arg : args) { + std::ostringstream arg_dump; + arg->GetLiveInterval()->Dump(arg_dump); + EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index; + ++arg_index; + } +} + +TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { + HInstruction* array = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* index = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* value = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt); + HInstruction* extra_arg1 = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt); + HInstruction* extra_arg2 = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot); + ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 }, + allocator_.Adapter()); + for (HInstruction* insn : args) { + entry_->AddInstruction(insn); + } + + HBasicBlock* block = CreateSuccessor(entry_); + HInstruction* null_check = new (&allocator_) HNullCheck(array, 0); + block->AddInstruction(null_check); + HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_, + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + null_check); + null_check_env->CopyFrom(args); + null_check->SetRawEnvironment(null_check_env); + HInstruction* length = new (&allocator_) HArrayLength(array, 0); + block->AddInstruction(length); + // Use HAboveOrEqual+HDeoptimize as the bounds check. + HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); + block->AddInstruction(ae); + HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u); + block->AddInstruction(deoptimize); + HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + deoptimize); + deoptimize_env->CopyFrom(args); + deoptimize->SetRawEnvironment(deoptimize_env); + HInstruction* array_set = + new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0); + block->AddInstruction(array_set); + + graph_->BuildDominatorTree(); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + ssa_analysis.Analyze(); + + EXPECT_FALSE(graph_->IsDebuggable()); + EXPECT_EQ(20u, deoptimize->GetLifetimePosition()); + static const char* const expected[] = { + "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " + "is_high: 0", + "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " + "is_high: 0", + "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + // Environment use in HDeoptimize keeps even the non-reference argument alive. + "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + // Environment uses keep the reference argument alive. + "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + }; + ASSERT_EQ(arraysize(expected), args.size()); + size_t arg_index = 0u; + for (HInstruction* arg : args) { + std::ostringstream arg_dump; + arg->GetLiveInterval()->Dump(arg_dump); + EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index; + ++arg_index; + } +} + +} // namespace art diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 5c4875951b..d265a44092 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -42,7 +42,10 @@ enum class RegisterView { // private kUseQuaternaryName, }; -template<typename Ass, typename Reg, typename FPReg, typename Imm> +// For use in the template as the default type to get a nonvector registers version. +struct NoVectorRegs {}; + +template<typename Ass, typename Reg, typename FPReg, typename Imm, typename VecReg = NoVectorRegs> class AssemblerTest : public testing::Test { public: Ass* GetAssembler() { @@ -146,7 +149,8 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, - int bias = 0) { + int bias = 0, + int multiplier = 1) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); @@ -154,7 +158,7 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm + bias); + (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -172,7 +176,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm + bias; + sreg << imm * multiplier + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -538,6 +542,69 @@ class AssemblerTest : public testing::Test { return str; } + std::string RepeatVV(void (Ass::*f)(VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVVV(void (Ass::*f)(VecReg, VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVR(void (Ass::*f)(VecReg, Reg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, Reg>( + f, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + + template <typename ImmType> + std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + return RepeatTemplatedRegistersImmBits<VecReg, Reg, ImmType>( + f, + imm_bits, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias, + multiplier); + } + + template <typename ImmType> + std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<VecReg, VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + // This is intended to be run as a test. bool CheckTools() { return test_helper_->CheckTools(); @@ -552,6 +619,11 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + virtual std::vector<VecReg*> GetVectorRegisters() { + UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; + UNREACHABLE(); + } + // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; @@ -971,6 +1043,12 @@ class AssemblerTest : public testing::Test { return sreg.str(); } + std::string GetVecRegName(const VecReg& reg) { + std::ostringstream sreg; + sreg << reg; + return sreg.str(); + } + // If the assembly file needs a header, return it in a sub-class. virtual const char* GetAssemblyHeader() { return nullptr; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 39eb5893d8..c14315a91e 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -184,6 +184,106 @@ void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) Emit(encoding); } +void Mips64Assembler::EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wt, kNoVectorRegister); + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + static_cast<uint32_t>(wt) << kWtShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df_m << kDfMShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaELMOperationShift | + df_n << kDfNShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaMI10(int s10, + GpuRegister rs, + VectorRegister wd, + int minor_opcode, + int df) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(s10)) << s10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + s10 << kS10Shift | + static_cast<uint32_t>(rs) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode << kS10MinorShift | + df; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2R(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2ROperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2RFOperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x21); } @@ -1080,6 +1180,378 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) { Nor(rd, rs, ZERO); } +// TODO: Check for MSA presence in Mips64InstructionSetFeatures for each MSA instruction. + +void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) { + EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(IsUint<4>(n4)) << n4; + EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(IsUint<3>(n3)) << n3; + EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(IsUint<2>(n2)) << n2; + EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(IsUint<1>(n1)) << n1; + EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19); +} + +void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0); +} + +void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1); +} + +void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2); +} + +void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3); +} + +void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0); +} + +void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1); +} + +void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2); +} + +void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); +} + void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) { TemplateLoadConst32(this, rd, value); } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 8bbe862d19..471fdf2e9e 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -266,6 +266,7 @@ void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) { } } +static constexpr size_t kMips64HalfwordSize = 2; static constexpr size_t kMips64WordSize = 4; static constexpr size_t kMips64DoublewordSize = 8; @@ -644,6 +645,101 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); + // MSA instructions. + void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void Ffint_sW(VectorRegister wd, VectorRegister ws); + void Ffint_sD(VectorRegister wd, VectorRegister ws); + void Ftint_sW(VectorRegister wd, VectorRegister ws); + void Ftint_sD(VectorRegister wd, VectorRegister ws); + + void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). + void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); + void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); + void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); + void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); + void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); + void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); + + void MoveV(VectorRegister wd, VectorRegister ws); + void SplatiB(VectorRegister wd, VectorRegister ws, int n4); + void SplatiH(VectorRegister wd, VectorRegister ws, int n3); + void SplatiW(VectorRegister wd, VectorRegister ws, int n2); + void SplatiD(VectorRegister wd, VectorRegister ws, int n1); + void FillB(VectorRegister wd, GpuRegister rs); + void FillH(VectorRegister wd, GpuRegister rs); + void FillW(VectorRegister wd, GpuRegister rs); + void FillD(VectorRegister wd, GpuRegister rs); + + void LdB(VectorRegister wd, GpuRegister rs, int offset); + void LdH(VectorRegister wd, GpuRegister rs, int offset); + void LdW(VectorRegister wd, GpuRegister rs, int offset); + void LdD(VectorRegister wd, GpuRegister rs, int offset); + void StB(VectorRegister wd, GpuRegister rs, int offset); + void StH(VectorRegister wd, GpuRegister rs, int offset); + void StW(VectorRegister wd, GpuRegister rs, int offset); + void StD(VectorRegister wd, GpuRegister rs, int offset); + // Higher level composite instructions. int InstrCountForLoadReplicatedConst32(int64_t); void LoadConst32(GpuRegister rd, int32_t value); @@ -782,6 +878,86 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer public: template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConstToOffset(StoreOperandType type, + int64_t value, + GpuRegister base, + int32_t offset, + GpuRegister temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // We permit `base` and `temp` to coincide (however, we check that neither is AT), + // in which case the `base` register may be overwritten in the process. + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + if (!IsInt<16>(offset) || + (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && + !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); + Daddu(AT, AT, base); + base = AT; + offset &= (kMips64DoublewordSize - 1); + } + GpuRegister reg; + // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` + // to load and hold the value but we can use AT instead as AT hasn't been used yet. + // Otherwise, `temp` can be used for the value. And if `temp` is the same as the + // original `base` (that is, `base` prior to the adjustment), the original `base` + // register will be overwritten. + if (base == temp) { + temp = AT; + } + + if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) { + if (value == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst64(reg, value); + } + Sd(reg, base, offset); + null_checker(); + } else { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst32(reg, low); + } + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + // not aligned to kMips64DoublewordSize + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + if (high == 0) { + reg = ZERO; + } else { + reg = temp; + if (high != low) { + LoadConst32(reg, high); + } + } + Sw(reg, base, offset + kMips64WordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1300,6 +1476,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); + void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void Buncond(Mips64Label* label); void Bcond(Mips64Label* label, diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 96a02c46d7..87a1823d20 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -37,12 +37,14 @@ struct MIPS64CpuRegisterCompare { class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> { + uint32_t, + mips64::VectorRegister> { public: typedef AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> Base; + uint32_t, + mips64::VectorRegister> Base; protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... @@ -60,7 +62,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation // section, .rela.text, that has the needed info to fix up the branches). - return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; } void Pad(std::vector<uint8_t>& data) OVERRIDE { @@ -176,6 +178,39 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, fp_registers_.push_back(new mips64::FpuRegister(mips64::F29)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F30)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F31)); + + vec_registers_.push_back(new mips64::VectorRegister(mips64::W0)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W1)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W2)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W3)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W4)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W5)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W6)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W7)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W8)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W9)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W10)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W11)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W12)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W13)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W14)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W15)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W16)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W17)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W18)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W19)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W20)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W21)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W22)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W23)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W24)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W25)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W26)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W27)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W28)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W29)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W30)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W31)); } } @@ -193,6 +228,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return fp_registers_; } + std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE { + return vec_registers_; + } + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { return imm_value; } @@ -272,6 +311,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; std::vector<mips64::FpuRegister*> fp_registers_; + std::vector<mips64::VectorRegister*> vec_registers_; }; @@ -2178,6 +2218,82 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { DriverStr(expected, "StoreFpuToOffset"); } +TEST_F(AssemblerMIPS64Test, StoreConstToOffset) { + __ StoreConstToOffset(mips64::kStoreByte, 0xFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0xFFFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x123456789ABCDEF0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreByte, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567812345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567800000000, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x0000000012345678, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0xFFF0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0xFFF0, mips64::T8); + + const char* expected = + "ori $t8, $zero, 0xFF\n" + "sb $t8, 0($a1)\n" + "ori $t8, $zero, 0xFFFF\n" + "sh $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "sw $t8, 0($a1)\n" + "lui $t8, 0x9abc\n" + "ori $t8, $t8,0xdef0\n" + "dahi $t8, $t8, 0x5679\n" + "dati $t8, $t8, 0x1234\n" + "sd $t8, 0($a1)\n" + "sb $zero, 0($a1)\n" + "sh $zero, 0($a1)\n" + "sw $zero, 0($a1)\n" + "sd $zero, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "dins $t8, $t8, 0x20, 0x20\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x246\n" + "ori $t8, $t8, 0x8acf\n" + "dsll32 $t8, $t8, 0x3\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sd $t8, 0($a1)\n" + "sw $zero, 0($t8)\n" + "lui $at,0x1234\n" + "ori $at, $at, 0x5678\n" + "sw $at, 0($t8)\n" + "lui $at, 0xffff\n" + "ori $at, $at, 0x10\n" + "daddu $at, $at, $a1\n" + "sw $zero, 0($at)\n" + "li $at, 0xfff0\n" + "daddu $at, $at, $a1\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0($at)\n" + "lui $at, 0xffff\n" + "ori $at, $at, 0x10\n" + "daddu $at, $at, $t8\n" + "sw $zero, 0($at)\n" + "li $at, 0xfff0\n" + "daddu $at, $at, $t8\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0($at)\n"; + DriverStr(expected, "StoreConstToOffset"); +} ////////////////////////////// // Loading/adding Constants // ////////////////////////////// @@ -2374,6 +2490,370 @@ TEST_F(AssemblerMIPS64Test, LoadConst64) { EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths); } +// MSA instructions. + +TEST_F(AssemblerMIPS64Test, AndV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v"); +} + +TEST_F(AssemblerMIPS64Test, OrV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v"); +} + +TEST_F(AssemblerMIPS64Test, NorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v"); +} + +TEST_F(AssemblerMIPS64Test, XorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v"); +} + +TEST_F(AssemblerMIPS64Test, AddvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), + "addv.b"); +} + +TEST_F(AssemblerMIPS64Test, AddvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), + "addv.h"); +} + +TEST_F(AssemblerMIPS64Test, AddvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), + "addv.w"); +} + +TEST_F(AssemblerMIPS64Test, AddvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), + "addv.d"); +} + +TEST_F(AssemblerMIPS64Test, SubvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), + "subv.b"); +} + +TEST_F(AssemblerMIPS64Test, SubvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), + "subv.h"); +} + +TEST_F(AssemblerMIPS64Test, SubvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), + "subv.w"); +} + +TEST_F(AssemblerMIPS64Test, SubvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), + "subv.d"); +} + +TEST_F(AssemblerMIPS64Test, MulvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), + "mulv.b"); +} + +TEST_F(AssemblerMIPS64Test, MulvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), + "mulv.h"); +} + +TEST_F(AssemblerMIPS64Test, MulvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), + "mulv.w"); +} + +TEST_F(AssemblerMIPS64Test, MulvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), + "mulv.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"), + "div_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"), + "div_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"), + "div_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"), + "div_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"), + "div_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"), + "div_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"), + "div_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"), + "div_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"), + "mod_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"), + "mod_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"), + "mod_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"), + "mod_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"), + "mod_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"), + "mod_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"), + "mod_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"), + "mod_u.d"); +} + +TEST_F(AssemblerMIPS64Test, FaddW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), + "fadd.w"); +} + +TEST_F(AssemblerMIPS64Test, FaddD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), + "fadd.d"); +} + +TEST_F(AssemblerMIPS64Test, FsubW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), + "fsub.w"); +} + +TEST_F(AssemblerMIPS64Test, FsubD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), + "fsub.d"); +} + +TEST_F(AssemblerMIPS64Test, FmulW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), + "fmul.w"); +} + +TEST_F(AssemblerMIPS64Test, FmulD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), + "fmul.d"); +} + +TEST_F(AssemblerMIPS64Test, FdivW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), + "fdiv.w"); +} + +TEST_F(AssemblerMIPS64Test, FdivD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), + "fdiv.d"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), + "ffint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), + "ffint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), + "ftint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), + "ftint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, SllB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b"); +} + +TEST_F(AssemblerMIPS64Test, SllH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h"); +} + +TEST_F(AssemblerMIPS64Test, SllW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w"); +} + +TEST_F(AssemblerMIPS64Test, SllD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d"); +} + +TEST_F(AssemblerMIPS64Test, SraB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b"); +} + +TEST_F(AssemblerMIPS64Test, SraH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h"); +} + +TEST_F(AssemblerMIPS64Test, SraW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w"); +} + +TEST_F(AssemblerMIPS64Test, SraD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d"); +} + +TEST_F(AssemblerMIPS64Test, SrlB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b"); +} + +TEST_F(AssemblerMIPS64Test, SrlH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h"); +} + +TEST_F(AssemblerMIPS64Test, SrlW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w"); +} + +TEST_F(AssemblerMIPS64Test, SrlD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d"); +} + +TEST_F(AssemblerMIPS64Test, SlliB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), + "slli.b"); +} + +TEST_F(AssemblerMIPS64Test, SlliH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), + "slli.h"); +} + +TEST_F(AssemblerMIPS64Test, SlliW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), + "slli.w"); +} + +TEST_F(AssemblerMIPS64Test, SlliD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), + "slli.d"); +} + +TEST_F(AssemblerMIPS64Test, MoveV) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v"); +} + +TEST_F(AssemblerMIPS64Test, SplatiB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"), + "splati.b"); +} + +TEST_F(AssemblerMIPS64Test, SplatiH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"), + "splati.h"); +} + +TEST_F(AssemblerMIPS64Test, SplatiW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"), + "splati.w"); +} + +TEST_F(AssemblerMIPS64Test, SplatiD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"), + "splati.d"); +} + +TEST_F(AssemblerMIPS64Test, FillB) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b"); +} + +TEST_F(AssemblerMIPS64Test, FillH) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h"); +} + +TEST_F(AssemblerMIPS64Test, FillW) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); +} + +TEST_F(AssemblerMIPS64Test, FillD) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); +} + +TEST_F(AssemblerMIPS64Test, LdB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); +} + +TEST_F(AssemblerMIPS64Test, LdH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2), + "ld.h"); +} + +TEST_F(AssemblerMIPS64Test, LdW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4), + "ld.w"); +} + +TEST_F(AssemblerMIPS64Test, LdD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8), + "ld.d"); +} + +TEST_F(AssemblerMIPS64Test, StB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b"); +} + +TEST_F(AssemblerMIPS64Test, StH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2), + "st.h"); +} + +TEST_F(AssemblerMIPS64Test, StW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4), + "st.w"); +} + +TEST_F(AssemblerMIPS64Test, StD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8), + "st.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index f57498d34f..5ae9c73589 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -51,8 +51,35 @@ enum InstructionFields { kFdShift = 6, kFdBits = 5, + kMsaOperationShift = 23, + kMsaELMOperationShift = 22, + kMsa2ROperationShift = 18, + kMsa2RFOperationShift = 17, + kDfShift = 21, + kDfMShift = 16, + kDf2RShift = 16, + kDfNShift = 16, + kWtShift = 16, + kWtBits = 5, + kWsShift = 11, + kWsBits = 5, + kWdShift = 6, + kWdBits = 5, + kS10Shift = 16, + kS10MinorShift = 2, + kBranchOffsetMask = 0x0000ffff, kJumpOffsetMask = 0x03ffffff, + kMsaMajorOpcode = 0x1e, + kMsaDfMByteMask = 0x70, + kMsaDfMHalfwordMask = 0x60, + kMsaDfMWordMask = 0x40, + kMsaDfMDoublewordMask = 0x00, + kMsaDfNByteMask = 0x00, + kMsaDfNHalfwordMask = 0x20, + kMsaDfNWordMask = 0x30, + kMsaDfNDoublewordMask = 0x38, + kMsaS10Mask = 0x3ff, }; enum ScaleFactor { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 6a57f45e42..0a6ceefe69 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1169,6 +1169,32 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) { } +void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::orpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index e3c123ccaf..9d7ca77a56 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -487,6 +487,10 @@ class X86Assembler FINAL : public Assembler { void andps(XmmRegister dst, const Address& src); void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 110d0dcd05..52c7507bc4 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -581,6 +581,18 @@ TEST_F(AssemblerX86Test, PAnd) { DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86Test, AndnPD) { + DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86Test, AndnPS) { + DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86Test, PAndn) { + DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + TEST_F(AssemblerX86Test, OrPD) { DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 688fdcc37d..d7fed5b7a3 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1375,6 +1375,32 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 480e7116eb..93c24b8265 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -515,6 +515,10 @@ class X86_64Assembler FINAL : public Assembler { void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pand(XmmRegister dst, XmmRegister src); + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index ba011c968e..9d62fd1f6f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1269,6 +1269,18 @@ TEST_F(AssemblerX86_64Test, Pand) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86_64Test, andnpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86_64Test, andnps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86_64Test, Pandn) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + TEST_F(AssemblerX86_64Test, Orps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); } diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 01c33591e5..1a1d163304 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -714,12 +714,12 @@ TEST_F(VerifierDepsTest, MoveException_Unresolved) { TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInReferenced) { ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public final")); + ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public")); ASSERT_TRUE(HasField("Ljava/lang/System;", "out", "Ljava/io/PrintStream;", true, - "public final static", + "public static", "Ljava/lang/System;")); } @@ -727,13 +727,13 @@ TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass1) { ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass1")); ASSERT_TRUE(HasClass("Ljava/util/SimpleTimeZone;", true, "public")); ASSERT_TRUE(HasField( - "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public final static", "Ljava/util/TimeZone;")); + "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public static", "Ljava/util/TimeZone;")); } TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass2) { ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass2")); ASSERT_TRUE(HasField( - "LMySimpleTimeZone;", "SHORT", "I", true, "public final static", "Ljava/util/TimeZone;")); + "LMySimpleTimeZone;", "SHORT", "I", true, "public static", "Ljava/util/TimeZone;")); } TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface1) { @@ -743,7 +743,7 @@ TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface1) { "PI_ENABLE_OUTPUT_ESCAPING", "Ljava/lang/String;", true, - "public final static", + "public static", "Ljavax/xml/transform/Result;")); } @@ -753,7 +753,7 @@ TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface2) { "PI_ENABLE_OUTPUT_ESCAPING", "Ljava/lang/String;", true, - "public final static", + "public static", "Ljavax/xml/transform/Result;")); } @@ -763,7 +763,7 @@ TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface3) { "PI_ENABLE_OUTPUT_ESCAPING", "Ljava/lang/String;", true, - "public final static", + "public static", "Ljavax/xml/transform/Result;")); } @@ -773,13 +773,13 @@ TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface4) { "ELEMENT_NODE", "S", true, - "public final static", + "public static", "Lorg/w3c/dom/Node;")); } TEST_F(VerifierDepsTest, StaticField_Unresolved_ReferrerInBoot) { ASSERT_TRUE(VerifyMethod("StaticField_Unresolved_ReferrerInBoot")); - ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public abstract")); + ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public")); ASSERT_TRUE(HasField("Ljava/util/TimeZone;", "x", "I", false)); } @@ -851,7 +851,7 @@ TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInReferenced) { TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass1) { ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract")); + ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "setSocketImplFactory", @@ -874,7 +874,7 @@ TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass2) { TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface1) { ASSERT_TRUE(VerifyMethod("InvokeStatic_DeclaredInInterface1")); - ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public abstract interface")); + ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public interface")); ASSERT_TRUE(HasMethod("direct", "Ljava/util/Map$Entry;", "comparingByKey", @@ -896,7 +896,7 @@ TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface2) { TEST_F(VerifierDepsTest, InvokeStatic_Unresolved1) { ASSERT_FALSE(VerifyMethod("InvokeStatic_Unresolved1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract")); + ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false)); } @@ -914,7 +914,7 @@ TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInReferenced) { TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass1) { ASSERT_FALSE(VerifyMethod("InvokeDirect_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract")); + ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "checkOldImpl", @@ -932,7 +932,7 @@ TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass2) { TEST_F(VerifierDepsTest, InvokeDirect_Unresolved1) { ASSERT_FALSE(VerifyMethod("InvokeDirect_Unresolved1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract")); + ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false)); } @@ -987,7 +987,7 @@ TEST_F(VerifierDepsTest, InvokeVirtual_Resolved_DeclaredInSuperinterface) { "size", "()I", true, - "public abstract", + "public", "Ljava/util/Set;")); } @@ -1016,13 +1016,13 @@ TEST_F(VerifierDepsTest, InvokeVirtual_ActuallyDirect) { TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInReferenced) { ASSERT_TRUE(VerifyMethod("InvokeInterface_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface")); + ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); ASSERT_TRUE(HasMethod("interface", "Ljava/lang/Runnable;", "run", "()V", true, - "public abstract", + "public", "Ljava/lang/Runnable;")); } @@ -1038,7 +1038,7 @@ TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInSuperinterface1) { "run", "()V", true, - "public abstract", + "public", "Ljava/lang/Runnable;")); } @@ -1049,13 +1049,13 @@ TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInSuperinterface2) { "isEmpty", "()Z", true, - "public abstract", + "public", "Ljava/util/Set;")); } TEST_F(VerifierDepsTest, InvokeInterface_Unresolved1) { ASSERT_FALSE(VerifyMethod("InvokeInterface_Unresolved1")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface")); + ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); ASSERT_TRUE(HasMethod("interface", "Ljava/lang/Runnable;", "x", "()V", false)); } @@ -1066,20 +1066,20 @@ TEST_F(VerifierDepsTest, InvokeInterface_Unresolved2) { TEST_F(VerifierDepsTest, InvokeSuper_ThisAssignable) { ASSERT_TRUE(VerifyMethod("InvokeSuper_ThisAssignable")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface")); + ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); ASSERT_TRUE(HasAssignable("Ljava/lang/Runnable;", "Ljava/lang/Thread;", true)); ASSERT_TRUE(HasMethod("interface", "Ljava/lang/Runnable;", "run", "()V", true, - "public abstract", + "public", "Ljava/lang/Runnable;")); } TEST_F(VerifierDepsTest, InvokeSuper_ThisNotAssignable) { ASSERT_FALSE(VerifyMethod("InvokeSuper_ThisNotAssignable")); - ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public final")); + ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public")); ASSERT_TRUE(HasAssignable("Ljava/lang/Integer;", "Ljava/lang/Thread;", false)); ASSERT_TRUE(HasMethod( "virtual", "Ljava/lang/Integer;", "intValue", "()I", true, "public", "Ljava/lang/Integer;")); @@ -1087,12 +1087,12 @@ TEST_F(VerifierDepsTest, InvokeSuper_ThisNotAssignable) { TEST_F(VerifierDepsTest, ArgumentType_ResolvedReferenceArray) { ASSERT_TRUE(VerifyMethod("ArgumentType_ResolvedReferenceArray")); - ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public final abstract")); + ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public")); } TEST_F(VerifierDepsTest, NewArray_Resolved) { ASSERT_TRUE(VerifyMethod("NewArray_Resolved")); - ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public final abstract")); + ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public")); } TEST_F(VerifierDepsTest, EncodeDecode) { |