diff options
Diffstat (limited to 'compiler')
48 files changed, 1559 insertions, 426 deletions
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index d1acada6dd..74ef35e740 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -23,20 +23,12 @@ CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instr const ArrayRef<const uint8_t>& quick_code, bool owns_code_array) : compiler_driver_(compiler_driver), instruction_set_(instruction_set), owns_code_array_(owns_code_array), quick_code_(nullptr) { - SetCode(&quick_code); -} - -void CompiledCode::SetCode(const ArrayRef<const uint8_t>* quick_code) { - if (quick_code != nullptr) { - CHECK(!quick_code->empty()); - if (owns_code_array_) { - // If we are supposed to own the code, don't deduplicate it. - CHECK(quick_code_ == nullptr); - quick_code_ = new SwapVector<uint8_t>(quick_code->begin(), quick_code->end(), - compiler_driver_->GetSwapSpaceAllocator()); - } else { - quick_code_ = compiler_driver_->DeduplicateCode(*quick_code); - } + if (owns_code_array_) { + // If we are supposed to own the code, don't deduplicate it. + quick_code_ = new SwapVector<uint8_t>(quick_code.begin(), quick_code.end(), + compiler_driver_->GetSwapSpaceAllocator()); + } else { + quick_code_ = compiler_driver_->DeduplicateCode(quick_code); } } diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 45a62bc6c7..a4d2387030 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -47,8 +47,6 @@ class CompiledCode { return quick_code_; } - void SetCode(const ArrayRef<const uint8_t>* quick_code); - bool operator==(const CompiledCode& rhs) const; // To align an offset from a page-aligned value to make it suitable diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index bd590467e3..4b56b69a11 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -18,6 +18,7 @@ #include "art_method-inl.h" #include "base/logging.h" #include "base/mutex.h" +#include "compiled_method.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" #include "driver/compiler_driver.h" @@ -34,6 +35,13 @@ const bool kEnableQuickening = true; // Control check-cast elision. const bool kEnableCheckCastEllision = true; +struct QuickenedInfo { + QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} + + uint32_t dex_pc; + uint16_t dex_member_index; +}; + class DexCompiler { public: DexCompiler(art::CompilerDriver& compiler, @@ -47,6 +55,10 @@ class DexCompiler { void Compile(); + const std::vector<QuickenedInfo>& GetQuickenedInfo() const { + return quickened_info_; + } + private: const DexFile& GetDexFile() const { return *unit_.GetDexFile(); @@ -87,6 +99,11 @@ class DexCompiler { const DexCompilationUnit& unit_; const DexToDexCompilationLevel dex_to_dex_compilation_level_; + // Filled by the compiler when quickening, in order to encode that information + // in the .oat file. The runtime will use that information to get to the original + // opcodes. + std::vector<QuickenedInfo> quickened_info_; + DISALLOW_COPY_AND_ASSIGN(DexCompiler); }; @@ -248,6 +265,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, inst->SetOpcode(new_opcode); // Replace field index by field offset. inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx)); } } @@ -287,24 +305,60 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, } else { inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); } + quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx)); } } } -} // namespace optimizer -} // namespace art - -extern "C" void ArtCompileDEX(art::CompilerDriver& driver, const art::DexFile::CodeItem* code_item, - uint32_t access_flags, art::InvokeType invoke_type, - uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, - const art::DexFile& dex_file, - art::DexToDexCompilationLevel dex_to_dex_compilation_level) { - UNUSED(invoke_type); +extern "C" CompiledMethod* ArtCompileDEX( + art::CompilerDriver& driver, + const art::DexFile::CodeItem* code_item, + uint32_t access_flags, + art::InvokeType invoke_type ATTRIBUTE_UNUSED, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const art::DexFile& dex_file, + art::DexToDexCompilationLevel dex_to_dex_compilation_level) { if (dex_to_dex_compilation_level != art::kDontDexToDexCompile) { art::DexCompilationUnit unit(nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx)); art::optimizer::DexCompiler dex_compiler(driver, unit, dex_to_dex_compilation_level); dex_compiler.Compile(); + if (dex_compiler.GetQuickenedInfo().empty()) { + // No need to create a CompiledMethod if there are no quickened opcodes. + return nullptr; + } + + // Create a `CompiledMethod`, with the quickened information in the vmap table. + Leb128EncodingVector builder; + for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) { + builder.PushBackUnsigned(info.dex_pc); + builder.PushBackUnsigned(info.dex_member_index); + } + InstructionSet instruction_set = driver.GetInstructionSet(); + if (instruction_set == kThumb2) { + // Don't use the thumb2 instruction set to avoid the one off code delta. + instruction_set = kArm; + } + return CompiledMethod::SwapAllocCompiledMethod( + &driver, + instruction_set, + ArrayRef<const uint8_t>(), // no code + 0, + 0, + 0, + nullptr, // src_mapping_table + ArrayRef<const uint8_t>(), // mapping_table + ArrayRef<const uint8_t>(builder.GetData()), // vmap_table + ArrayRef<const uint8_t>(), // gc_map + ArrayRef<const uint8_t>(), // cfi data + ArrayRef<const LinkerPatch>()); } + return nullptr; } + +} // namespace optimizer + +} // namespace art diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index b1f5d870d4..044989e2a9 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -1192,7 +1192,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { case Instruction::CONST_WIDE_32: case Instruction::CONST_WIDE: case Instruction::CONST_WIDE_HIGH16: - case Instruction::ARRAY_LENGTH: case Instruction::CMPL_FLOAT: case Instruction::CMPG_FLOAT: case Instruction::CMPL_DOUBLE: @@ -1316,6 +1315,13 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { } break; + case Instruction::ARRAY_LENGTH: + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { + must_keep = true; + uses_all_vregs = true; + } + break; + case Instruction::AGET_OBJECT: case Instruction::AGET: case Instruction::AGET_WIDE: diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc index 461c844a60..6ba91b64b6 100644 --- a/compiler/dex/gvn_dead_code_elimination_test.cc +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -2066,4 +2066,31 @@ TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) { } } +TEST_F(GvnDeadCodeEliminationTestSimple, ArrayLengthThrows) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 0), // null + DEF_UNOP(3, Instruction::ARRAY_LENGTH, 1u, 0u), // null.length + DEF_CONST(3, Instruction::CONST, 2u, 1000u), // Overwrite the array-length dest. + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 1 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2 }; + ExpectValueNamesNE(diff_indexes); + + static const bool eliminated[] = { + false, false, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + } // namespace art diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index df4a9f2048..5f911db382 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1298,7 +1298,7 @@ void ArmMir2Lir::AssembleLIR() { */ delta &= ~0x3; } - DCHECK_EQ((delta & 0x3), 0); + DCHECK_ALIGNED(delta, 4); // First, a sanity check for cases we shouldn't see now if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 2ef92f851b..062f7aff66 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -880,7 +880,7 @@ LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStor LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base, int displacement, RegStorage r_src_dest, RegStorage r_work) { - DCHECK_EQ(displacement & 3, 0); + DCHECK_ALIGNED(displacement, 4); constexpr int kOffsetMask = 0xff << 2; int encoded_disp = (displacement & kOffsetMask) >> 2; // Within range of the instruction. RegStorage r_ptr = r_base; @@ -942,7 +942,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag already_generated = true; break; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); scale = 2; if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && (displacement >= 0)) { @@ -959,14 +959,14 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag } break; case kUnsignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; short_form = all_low && (displacement >> (5 + scale)) == 0; opcode16 = kThumbLdrhRRI5; opcode32 = kThumb2LdrhRRI12; break; case kSignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; DCHECK_EQ(opcode16, kThumbBkpt); // Not available. opcode32 = kThumb2LdrshRRI12; @@ -1096,7 +1096,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora already_generated = true; break; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); scale = 2; if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { short_form = true; @@ -1109,7 +1109,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora break; case kUnsignedHalf: case kSignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; short_form = all_low && (displacement >> (5 + scale)) == 0; opcode16 = kThumbStrhRRI5; diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index b78fb80aa0..25c69d19e5 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -909,7 +909,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); if (!IS_SIGNED_IMM26(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupT1Branch"; } @@ -933,7 +933,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); if (!IS_SIGNED_IMM19(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupLoad"; } @@ -965,7 +965,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); // Check if branch offset can be encoded in tbz/tbnz. if (!IS_SIGNED_IMM14(delta >> 2)) { DexOffset dalvik_offset = lir->dalvik_offset; diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 37e5804f18..ec2475a7f7 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -714,7 +714,7 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora } else { opcode = kMipsFldc1; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; } is64bit = true; @@ -736,15 +736,15 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora DCHECK(r_dest.IsDouble()); } } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: opcode = kMipsLhu; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kSignedHalf: opcode = kMipsLh; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: opcode = kMipsLbu; @@ -891,7 +891,7 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStor } else { opcode = kMipsFsdc1; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; } is64bit = true; @@ -913,12 +913,12 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStor DCHECK(r_src.IsDouble()); } } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: case kSignedHalf: opcode = kMipsSh; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: case kSignedByte: diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 61a1becac1..b16ae982f2 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -659,7 +659,7 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov32RA : kX86Mov32RM; } // TODO: double store is to unaligned address - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kWord: if (cu_->target64) { @@ -677,15 +677,15 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86MovssRA : kX86MovssRM; DCHECK(r_dest.IsFloat()); } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: opcode = is_array ? kX86Movzx16RA : kX86Movzx16RM; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kSignedHalf: opcode = is_array ? kX86Movsx16RA : kX86Movsx16RM; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: opcode = is_array ? kX86Movzx8RA : kX86Movzx8RM; @@ -812,7 +812,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov32AR : kX86Mov32MR; } // TODO: double store is to unaligned address - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kWord: if (cu_->target64) { @@ -831,13 +831,13 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86MovssAR : kX86MovssMR; DCHECK(r_src.IsSingle()); } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); consider_non_temporal = true; break; case kUnsignedHalf: case kSignedHalf: opcode = is_array ? kX86Mov16AR : kX86Mov16MR; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: case kSignedByte: diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 7890108f41..a52bfaeb5b 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2291,10 +2291,16 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i // TODO: add a command-line option to disable DEX-to-DEX compilation ? // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on // it. - (*dex_to_dex_compiler_)(*this, code_item, access_flags, - invoke_type, class_def_idx, - method_idx, class_loader, dex_file, - has_verified_method ? dex_to_dex_compilation_level : kRequired); + compiled_method = (*dex_to_dex_compiler_)( + *this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + has_verified_method ? dex_to_dex_compilation_level : kRequired); } } if (kTimeCompileMethod) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 2d7ceaeea1..5cf4044fd4 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -675,12 +675,13 @@ class CompilerDriver { typedef void (*CompilerCallbackFn)(CompilerDriver& driver); typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver); - typedef void (*DexToDexCompilerFn)(CompilerDriver& driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, InvokeType invoke_type, - uint32_t class_dex_idx, uint32_t method_idx, - jobject class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level); + typedef CompiledMethod* (*DexToDexCompilerFn)( + CompilerDriver& driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, InvokeType invoke_type, + uint32_t class_dex_idx, uint32_t method_idx, + jobject class_loader, const DexFile& dex_file, + DexToDexCompilationLevel dex_to_dex_compilation_level); DexToDexCompilerFn dex_to_dex_compiler_; void* compiler_context_; diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index c68bbc0655..c10ffebbbc 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -249,16 +249,16 @@ void WriteDebugSections(const CompilerDriver* compiler, // Find all addresses (low_pc) which contain deduped methods. // The first instance of method is not marked deduped_, but the rest is. std::unordered_set<uint32_t> deduped_addresses; - for (auto it = method_infos.begin(); it != method_infos.end(); ++it) { - if (it->deduped_) { - deduped_addresses.insert(it->low_pc_); + for (const OatWriter::DebugInfo& mi : method_infos) { + if (mi.deduped_) { + deduped_addresses.insert(mi.low_pc_); } } // Group the methods into compilation units based on source file. std::vector<std::vector<const OatWriter::DebugInfo*>> compilation_units; const char* last_source_file = nullptr; - for (const auto& mi : method_infos) { + for (const OatWriter::DebugInfo& mi : method_infos) { // Attribute given instruction range only to single method. // Otherwise the debugger might get really confused. if (!mi.deduped_) { diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index fdfeb485fd..2b65aa9337 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -715,8 +715,10 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( - obj->AsString()->Intern()); + // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since + // we are guaranteed to not have GC during image writing. + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternImageString( + obj->AsString()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 754fe844f8..1523383657 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -199,7 +199,7 @@ class ImageWriter FINAL { const uint8_t* GetOatAddress(uint32_t offset) const { // With Quick, code is within the OatFile, as there are all in one // .o ELF object. - DCHECK_LT(offset, oat_file_->Size()); + DCHECK_LE(offset, oat_file_->Size()); DCHECK(oat_data_begin_ != nullptr); return offset == 0u ? nullptr : oat_data_begin_ + offset; } diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index a057a4cf16..13f67e6fd4 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -50,7 +50,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { // We want to put the method3 at a very precise offset. const uint32_t method3_offset = method1_offset + distance_without_thunks; - CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader))); + CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment); // Calculate size of method2 so that we put method3 at the correct place. const uint32_t method2_offset = @@ -242,8 +242,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { }; constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + bool thunk_in_gap = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + max_positive_disp); ASSERT_FALSE(thunk_in_gap); // There should be no thunk. @@ -262,8 +264,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { }; constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - method3_code, method3_patches, + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + method3_code, + ArrayRef<const LinkerPatch>(method3_patches), just_over_max_negative_disp - bl_offset_in_method3); ASSERT_FALSE(thunk_in_gap); // There should be no thunk. @@ -282,8 +286,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { }; constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + bool thunk_in_gap = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + just_over_max_positive_disp); ASSERT_TRUE(thunk_in_gap); @@ -311,8 +317,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { }; constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - method3_code, method3_patches, + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + method3_code, + ArrayRef<const LinkerPatch>(method3_patches), just_over_max_negative_disp - bl_offset_in_method3); ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 29355d6968..6b9c530d7a 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -108,7 +108,7 @@ uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { if (!current_method_thunks_.empty()) { uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64); if (kIsDebugBuild) { - CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; CHECK_LE(num_thunks, processed_adrp_thunks_); for (size_t i = 0u; i != num_thunks; ++i) { @@ -203,7 +203,7 @@ void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, if ((adrp & 0x9f000000u) != 0x90000000u) { CHECK(fix_cortex_a53_843419_); CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> - CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; CHECK_LE(num_thunks, processed_adrp_thunks_); uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index 21f93672ad..b3af4c6a05 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -66,7 +66,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { // We want to put the method3 at a very precise offset. const uint32_t last_method_offset = method1_offset + distance_without_thunks; const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); - CHECK(IsAligned<kArm64Alignment>(gap_end)); + CHECK_ALIGNED(gap_end, kArm64Alignment); // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB). // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB @@ -396,8 +396,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { }; constexpr uint32_t max_positive_disp = 128 * MB - 4u; - uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + uint32_t last_method_idx = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + max_positive_disp); ASSERT_EQ(expected_last_method_idx, last_method_idx); @@ -420,8 +422,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { }; constexpr uint32_t max_negative_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - last_method_code, last_method_patches, + uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + last_method_code, + ArrayRef<const LinkerPatch>(last_method_patches), max_negative_disp - bl_offset_in_last_method); uint32_t method1_offset = GetMethodOffset(1u); uint32_t last_method_offset = GetMethodOffset(last_method_idx); @@ -445,7 +449,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { constexpr uint32_t just_over_max_positive_disp = 128 * MB; uint32_t last_method_idx = Create2MethodsWithGap( - method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(), + method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + just_over_max_positive_disp); ASSERT_EQ(expected_last_method_idx, last_method_idx); @@ -474,7 +481,8 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches, + kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, + ArrayRef<const LinkerPatch>(last_method_patches), just_over_max_negative_disp - bl_offset_in_last_method); uint32_t method1_offset = GetMethodOffset(1u); uint32_t last_method_offset = GetMethodOffset(last_method_idx); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index a98a3046e5..4318ea5b6c 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -374,9 +374,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { uint32_t quick_code_offset = 0; const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); - CHECK(quick_code != nullptr); uint32_t code_size = quick_code->size() * sizeof(uint8_t); - CHECK_NE(code_size, 0U); uint32_t thumb_offset = compiled_method->CodeDelta(); // Deduplicate code arrays if we are not producing debuggable code. @@ -394,16 +392,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } } - MethodReference method_ref(dex_file_, it.GetMemberIndex()); - auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); - if (method_lb != writer_->method_offset_map_.map.end() && - !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { - // TODO: Should this be a hard failure? - LOG(WARNING) << "Multiple definitions of " - << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) - << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); - } else { - writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); + if (code_size != 0) { + MethodReference method_ref(dex_file_, it.GetMemberIndex()); + auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); + if (method_lb != writer_->method_offset_map_.map.end() && + !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { + // TODO: Should this be a hard failure? + LOG(WARNING) << "Multiple definitions of " + << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) + << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); + } else { + writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); + } } // Update quick method header. @@ -411,21 +411,24 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_]; uint32_t mapping_table_offset = method_header->mapping_table_offset_; uint32_t vmap_table_offset = method_header->vmap_table_offset_; + // If we don't have quick code, then we must have a vmap, as that is how the dex2dex + // compiler records its transformations. + DCHECK(quick_code != nullptr || vmap_table_offset != 0); uint32_t gc_map_offset = method_header->gc_map_offset_; // The code offset was 0 when the mapping/vmap table offset was set, so it's set // to 0-offset and we need to adjust it by code_offset. uint32_t code_offset = quick_code_offset - thumb_offset; - if (mapping_table_offset != 0u) { + if (mapping_table_offset != 0u && code_offset != 0u) { mapping_table_offset += code_offset; - DCHECK_LT(mapping_table_offset, code_offset); + DCHECK_LT(mapping_table_offset, code_offset) << "Overflow in oat offsets"; } - if (vmap_table_offset != 0u) { + if (vmap_table_offset != 0u && code_offset != 0u) { vmap_table_offset += code_offset; - DCHECK_LT(vmap_table_offset, code_offset); + DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets"; } - if (gc_map_offset != 0u) { + if (gc_map_offset != 0u && code_offset != 0u) { gc_map_offset += code_offset; - DCHECK_LT(gc_map_offset, code_offset); + DCHECK_LT(gc_map_offset, code_offset) << "Overflow in oat offsets"; } uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes(); uint32_t core_spill_mask = compiled_method->GetCoreSpillMask(); @@ -534,7 +537,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { const ClassDataItemIterator& it, uint32_t thumb_offset) { offset_ = writer_->relative_patcher_->ReserveSpace( - offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); + offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); offset_ = compiled_method->AlignCode(offset_); DCHECK_ALIGNED_PARAM(offset_, GetInstructionSetAlignment(compiled_method->GetInstructionSet())); @@ -619,15 +622,19 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr, invoke_type); if (method == nullptr) { - LOG(ERROR) << "Unexpected failure to resolve a method: " - << PrettyMethod(it.GetMemberIndex(), *dex_file_, true); + LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: " + << PrettyMethod(it.GetMemberIndex(), *dex_file_, true); soa.Self()->AssertPendingException(); mirror::Throwable* exc = soa.Self()->GetException(); std::string dump = exc->Dump(); LOG(FATAL) << dump; + UNREACHABLE(); + } + + if (compiled_method != nullptr && compiled_method->GetQuickCode()->size() != 0) { + method->SetEntryPointFromQuickCompiledCodePtrSize( + reinterpret_cast<void*>(offsets.code_offset_), pointer_size_); } - method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_), - pointer_size_); return true; } @@ -689,85 +696,82 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { OutputStream* out = out_; const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); - if (quick_code != nullptr) { - // Need a wrapper if we create a copy for patching. - ArrayRef<const uint8_t> wrapped(*quick_code); - uint32_t code_size = quick_code->size() * sizeof(uint8_t); - CHECK_NE(code_size, 0U); - - // Deduplicate code arrays. - const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; - if (method_offsets.code_offset_ >= offset_) { - offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); - if (offset_ == 0u) { - ReportWriteFailure("relative call thunk", it); - return false; - } - uint32_t aligned_offset = compiled_method->AlignCode(offset_); - uint32_t aligned_code_delta = aligned_offset - offset_; - if (aligned_code_delta != 0) { - if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { - ReportWriteFailure("code alignment padding", it); - return false; - } - offset_ += aligned_code_delta; - DCHECK_OFFSET_(); - } - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); - DCHECK_EQ(method_offsets.code_offset_, - offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) - << PrettyMethod(it.GetMemberIndex(), *dex_file_); - const OatQuickMethodHeader& method_header = - oat_class->method_headers_[method_offsets_index_]; - writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); - if (!out->WriteFully(&method_header, sizeof(method_header))) { - ReportWriteFailure("method header", it); + // Need a wrapper if we create a copy for patching. + ArrayRef<const uint8_t> wrapped(*quick_code); + uint32_t code_size = quick_code->size() * sizeof(uint8_t); + + // Deduplicate code arrays. + const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; + if (method_offsets.code_offset_ > offset_) { + offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); + if (offset_ == 0u) { + ReportWriteFailure("relative call thunk", it); + return false; + } + uint32_t aligned_offset = compiled_method->AlignCode(offset_); + uint32_t aligned_code_delta = aligned_offset - offset_; + if (aligned_code_delta != 0) { + if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { + ReportWriteFailure("code alignment padding", it); return false; } - writer_->size_method_header_ += sizeof(method_header); - offset_ += sizeof(method_header); + offset_ += aligned_code_delta; DCHECK_OFFSET_(); + } + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + DCHECK_EQ(method_offsets.code_offset_, + offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) + << PrettyMethod(it.GetMemberIndex(), *dex_file_); + const OatQuickMethodHeader& method_header = + oat_class->method_headers_[method_offsets_index_]; + writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); + if (!out->WriteFully(&method_header, sizeof(method_header))) { + ReportWriteFailure("method header", it); + return false; + } + writer_->size_method_header_ += sizeof(method_header); + offset_ += sizeof(method_header); + DCHECK_OFFSET_(); - if (!compiled_method->GetPatches().empty()) { - patched_code_.assign(quick_code->begin(), quick_code->end()); - wrapped = ArrayRef<const uint8_t>(patched_code_); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() == kLinkerPatchCallRelative) { - // NOTE: Relative calls across oat files are not supported. - uint32_t target_offset = GetTargetOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, - offset_ + literal_offset, target_offset); - } else if (patch.Type() == kLinkerPatchDexCacheArray) { - uint32_t target_offset = GetDexCacheOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, - offset_ + literal_offset, - target_offset); - } else if (patch.Type() == kLinkerPatchCall) { - uint32_t target_offset = GetTargetOffset(patch); - PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); - } else if (patch.Type() == kLinkerPatchMethod) { - ArtMethod* method = GetTargetMethod(patch); - PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method); - } else if (patch.Type() == kLinkerPatchType) { - mirror::Class* type = GetTargetType(patch); - PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type); - } + if (!compiled_method->GetPatches().empty()) { + patched_code_.assign(quick_code->begin(), quick_code->end()); + wrapped = ArrayRef<const uint8_t>(patched_code_); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + // NOTE: Relative calls across oat files are not supported. + uint32_t target_offset = GetTargetOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, + offset_ + literal_offset, target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = GetDexCacheOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, + offset_ + literal_offset, + target_offset); + } else if (patch.Type() == kLinkerPatchCall) { + uint32_t target_offset = GetTargetOffset(patch); + PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); + } else if (patch.Type() == kLinkerPatchMethod) { + ArtMethod* method = GetTargetMethod(patch); + PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method); + } else if (patch.Type() == kLinkerPatchType) { + mirror::Class* type = GetTargetType(patch); + PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type); } } + } - writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size); - if (!out->WriteFully(wrapped.data(), code_size)) { - ReportWriteFailure("method code", it); - return false; - } - writer_->size_code_ += code_size; - offset_ += code_size; + writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size); + if (!out->WriteFully(wrapped.data(), code_size)) { + ReportWriteFailure("method code", it); + return false; } - DCHECK_OFFSET_(); + writer_->size_code_ += code_size; + offset_ += code_size; } + DCHECK_OFFSET_(); ++method_offsets_index_; } diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 329112a377..84201c39a7 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -154,11 +154,6 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { // entry block. Any following blocks would have had the join block // as a dominator, and `MergeWith` handles changing that to the // entry block. - - // Remove the original condition if it is now unused. - if (!if_condition->HasUses()) { - if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition); - } } void HBooleanSimplifier::Run() { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 1319f2c62a..52a3a1534a 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -804,7 +804,9 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, invoke_type = kDirect; break; case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: invoke_type = kVirtual; break; case Instruction::INVOKE_INTERFACE: @@ -1051,7 +1053,15 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, bool is_put) { uint32_t source_or_dest_reg = instruction.VRegA_22c(); uint32_t obj_reg = instruction.VRegB_22c(); - uint16_t field_index = instruction.VRegC_22c(); + uint16_t field_index; + if (instruction.IsQuickened()) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + field_index = LookupQuickenedInfo(dex_pc); + } else { + field_index = instruction.VRegC_22c(); + } ScopedObjectAccess soa(Thread::Current()); ArtField* resolved_field = @@ -1560,6 +1570,17 @@ void HGraphBuilder::PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex } } +bool HGraphBuilder::CanDecodeQuickenedInfo() const { + return interpreter_metadata_ != nullptr; +} + +uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) { + DCHECK(interpreter_metadata_ != nullptr); + uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_); + DCHECK_EQ(dex_pc, dex_pc_in_map); + return DecodeUnsignedLeb128(&interpreter_metadata_); +} + bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) { if (current_block_ == nullptr) { return true; // Dead code @@ -1657,6 +1678,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::RETURN_VOID_NO_BARRIER: case Instruction::RETURN_VOID: { BuildReturn(instruction, Primitive::kPrimVoid); break; @@ -1705,8 +1727,17 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::INVOKE_INTERFACE: case Instruction::INVOKE_STATIC: case Instruction::INVOKE_SUPER: - case Instruction::INVOKE_VIRTUAL: { - uint32_t method_idx = instruction.VRegB_35c(); + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: { + uint16_t method_idx; + if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + method_idx = LookupQuickenedInfo(dex_pc); + } else { + method_idx = instruction.VRegB_35c(); + } uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); uint32_t args[5]; instruction.GetVarArgs(args); @@ -1721,8 +1752,17 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::INVOKE_INTERFACE_RANGE: case Instruction::INVOKE_STATIC_RANGE: case Instruction::INVOKE_SUPER_RANGE: - case Instruction::INVOKE_VIRTUAL_RANGE: { - uint32_t method_idx = instruction.VRegB_3rc(); + case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: { + uint16_t method_idx; + if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + method_idx = LookupQuickenedInfo(dex_pc); + } else { + method_idx = instruction.VRegB_3rc(); + } uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); uint32_t register_index = instruction.VRegC(); if (!BuildInvoke(instruction, dex_pc, method_idx, @@ -2375,12 +2415,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; case Instruction::IGET: + case Instruction::IGET_QUICK: case Instruction::IGET_WIDE: + case Instruction::IGET_WIDE_QUICK: case Instruction::IGET_OBJECT: + case Instruction::IGET_OBJECT_QUICK: case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BOOLEAN_QUICK: case Instruction::IGET_BYTE: + case Instruction::IGET_BYTE_QUICK: case Instruction::IGET_CHAR: - case Instruction::IGET_SHORT: { + case Instruction::IGET_CHAR_QUICK: + case Instruction::IGET_SHORT: + case Instruction::IGET_SHORT_QUICK: { if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) { return false; } @@ -2388,12 +2435,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::IPUT: + case Instruction::IPUT_QUICK: case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: case Instruction::IPUT_OBJECT: + case Instruction::IPUT_OBJECT_QUICK: case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BOOLEAN_QUICK: case Instruction::IPUT_BYTE: + case Instruction::IPUT_BYTE_QUICK: case Instruction::IPUT_CHAR: - case Instruction::IPUT_SHORT: { + case Instruction::IPUT_CHAR_QUICK: + case Instruction::IPUT_SHORT: + case Instruction::IPUT_SHORT_QUICK: { if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) { return false; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 76610f5be2..ad5d92345b 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -39,7 +39,8 @@ class HGraphBuilder : public ValueObject { const DexCompilationUnit* const outer_compilation_unit, const DexFile* dex_file, CompilerDriver* driver, - OptimizingCompilerStats* compiler_stats) + OptimizingCompilerStats* compiler_stats, + const uint8_t* interpreter_metadata) : arena_(graph->GetArena()), branch_targets_(graph->GetArena(), 0), locals_(graph->GetArena(), 0), @@ -55,7 +56,8 @@ class HGraphBuilder : public ValueObject { code_start_(nullptr), latest_result_(nullptr), can_use_baseline_for_string_init_(true), - compilation_stats_(compiler_stats) {} + compilation_stats_(compiler_stats), + interpreter_metadata_(interpreter_metadata) {} // Only for unit testing. HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt) @@ -120,6 +122,9 @@ class HGraphBuilder : public ValueObject { const DexFile::CodeItem& code_item, const DexFile::TryItem& try_item); + bool CanDecodeQuickenedInfo() const; + uint16_t LookupQuickenedInfo(uint32_t dex_pc); + void InitializeLocals(uint16_t count); HLocal* GetLocalAt(int register_index) const; void UpdateLocal(int register_index, HInstruction* instruction) const; @@ -307,6 +312,8 @@ class HGraphBuilder : public ValueObject { OptimizingCompilerStats* compilation_stats_; + const uint8_t* interpreter_metadata_; + DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 5de629d605..6269d1628e 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -128,7 +128,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { for (i.Advance(); !i.Done(); i.Advance()) { HInstruction* inst = i.Current(); DCHECK(!inst->IsControlFlow()); - if (!inst->HasSideEffects() + if (!inst->DoesAnyWrite() && !inst->CanThrow() && !inst->IsSuspendCheck() // If we added an explicit barrier then we should keep it. diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 9679d0ab70..cfebb77dd7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -136,6 +136,33 @@ void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { VisitInstruction(check); } +void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { + // Ensure that all exception handlers are catch blocks and that handlers + // are not listed multiple times. + // Note that a normal-flow successor may be a catch block before CFG + // simplification. We only test normal-flow successors in SsaChecker. + for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) { + HBasicBlock* handler = it.Current(); + if (!handler->IsCatchBlock()) { + AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which " + "is not a catch block.", + current_block_->GetBlockId(), + try_boundary->DebugName(), + try_boundary->GetId(), + handler->GetBlockId())); + } + if (current_block_->GetSuccessors().Contains( + handler, /* start_from */ it.CurrentSuccessorIndex() + 1)) { + AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.", + handler->GetBlockId(), + try_boundary->DebugName(), + try_boundary->GetId())); + } + } + + VisitInstruction(try_boundary); +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -301,11 +328,32 @@ void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { void SSAChecker::VisitBasicBlock(HBasicBlock* block) { super_type::VisitBasicBlock(block); + // Ensure that catch blocks are not normal successors, and normal blocks are + // never exceptional successors. + const size_t num_normal_successors = block->NumberOfNormalSuccessors(); + for (size_t j = 0; j < num_normal_successors; ++j) { + HBasicBlock* successor = block->GetSuccessors().Get(j); + if (successor->IsCatchBlock()) { + AddError(StringPrintf("Catch block %d is a normal successor of block %d.", + successor->GetBlockId(), + block->GetBlockId())); + } + } + for (size_t j = num_normal_successors, e = block->GetSuccessors().Size(); j < e; ++j) { + HBasicBlock* successor = block->GetSuccessors().Get(j); + if (!successor->IsCatchBlock()) { + AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.", + successor->GetBlockId(), + block->GetBlockId())); + } + } + // Ensure there is no critical edge (i.e., an edge connecting a // block with multiple successors to a block with multiple - // predecessors). - if (block->GetSuccessors().Size() > 1) { - for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + // predecessors). Exceptional edges are synthesized and hence + // not accounted for. + if (block->NumberOfNormalSuccessors() > 1) { + for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); if (successor->GetPredecessors().Size() > 1) { AddError(StringPrintf("Critical edge between blocks %d and %d.", @@ -326,6 +374,54 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Ensure try membership information is consistent. + HTryBoundary* try_entry = block->GetTryEntry(); + if (block->IsCatchBlock()) { + if (try_entry != nullptr) { + AddError(StringPrintf("Catch blocks should not be try blocks but catch block %d " + "has try entry %s:%d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId())); + } + + if (block->IsLoopHeader()) { + AddError(StringPrintf("Catch blocks should not be loop headers but catch block %d is.", + block->GetBlockId())); + } + } else { + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + HBasicBlock* predecessor = block->GetPredecessors().Get(i); + HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors(); + if (try_entry == nullptr) { + if (incoming_try_entry != nullptr) { + AddError(StringPrintf("Block %d has no try entry but try entry %s:%d follows " + "from predecessor %d.", + block->GetBlockId(), + incoming_try_entry->DebugName(), + incoming_try_entry->GetId(), + predecessor->GetBlockId())); + } + } else if (incoming_try_entry == nullptr) { + AddError(StringPrintf("Block %d has try entry %s:%d but no try entry follows " + "from predecessor %d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId(), + predecessor->GetBlockId())); + } else if (!incoming_try_entry->HasSameExceptionHandlersAs(*try_entry)) { + AddError(StringPrintf("Block %d has try entry %s:%d which is not consistent " + "with %s:%d that follows from predecessor %d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId(), + incoming_try_entry->DebugName(), + incoming_try_entry->GetId(), + predecessor->GetBlockId())); + } + } + } + if (block->IsLoopHeader()) { CheckLoop(block); } @@ -472,32 +568,6 @@ void SSAChecker::VisitPhi(HPhi* phi) { phi->GetBlock()->GetBlockId())); } - // Ensure the number of inputs of a phi is the same as the number of - // its predecessors. - const GrowableArray<HBasicBlock*>& predecessors = - phi->GetBlock()->GetPredecessors(); - if (phi->InputCount() != predecessors.Size()) { - AddError(StringPrintf( - "Phi %d in block %d has %zu inputs, " - "but block %d has %zu predecessors.", - phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), - phi->GetBlock()->GetBlockId(), predecessors.Size())); - } else { - // Ensure phi input at index I either comes from the Ith - // predecessor or from a block that dominates this predecessor. - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - HBasicBlock* predecessor = predecessors.Get(i); - if (!(input->GetBlock() == predecessor - || input->GetBlock()->Dominates(predecessor))) { - AddError(StringPrintf( - "Input %d at index %zu of phi %d from block %d is not defined in " - "predecessor number %zu nor in a block dominating it.", - input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), - i)); - } - } - } // Ensure that the inputs have the same primitive kind as the phi. for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { HInstruction* input = phi->InputAt(i); @@ -516,6 +586,38 @@ void SSAChecker::VisitPhi(HPhi* phi) { phi->GetBlock()->GetBlockId(), Primitive::PrettyDescriptor(phi->GetType()))); } + + if (phi->IsCatchPhi()) { + // The number of inputs of a catch phi corresponds to the total number of + // throwing instructions caught by this catch block. + } else { + // Ensure the number of inputs of a non-catch phi is the same as the number + // of its predecessors. + const GrowableArray<HBasicBlock*>& predecessors = + phi->GetBlock()->GetPredecessors(); + if (phi->InputCount() != predecessors.Size()) { + AddError(StringPrintf( + "Phi %d in block %d has %zu inputs, " + "but block %d has %zu predecessors.", + phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), + phi->GetBlock()->GetBlockId(), predecessors.Size())); + } else { + // Ensure phi input at index I either comes from the Ith + // predecessor or from a block that dominates this predecessor. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* predecessor = predecessors.Get(i); + if (!(input->GetBlock() == predecessor + || input->GetBlock()->Dominates(predecessor))) { + AddError(StringPrintf( + "Input %d at index %zu of phi %d from block %d is not defined in " + "predecessor number %zu nor in a block dominating it.", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + i)); + } + } + } + } } void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 7c72e23e2d..0e270dbe18 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -48,6 +48,9 @@ class GraphChecker : public HGraphDelegateVisitor { // Check that the HasBoundsChecks() flag is set for bounds checks. void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + // Check successors of blocks ending in TryBoundary. + void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; + // Check that HCheckCast and HInstanceOf have HLoadClass as second input. void VisitCheckCast(HCheckCast* check) OVERRIDE; void VisitInstanceOf(HInstanceOf* check) OVERRIDE; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index aaf7a6d8f5..694b68ce94 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -158,12 +158,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { std::ostream& output, const char* pass_name, bool is_after_pass, + bool graph_in_bad_state, const CodeGenerator& codegen, const DisassemblyInformation* disasm_info = nullptr) : HGraphDelegateVisitor(graph), output_(output), pass_name_(pass_name), is_after_pass_(is_after_pass), + graph_in_bad_state_(graph_in_bad_state), codegen_(codegen), disasm_info_(disasm_info), disassembler_(disasm_info_ != nullptr @@ -251,11 +253,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintSuccessors(HBasicBlock* block) { AddIndent(); output_ << "successors"; - for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { - if (!block->IsExceptionalSuccessor(i)) { - HBasicBlock* successor = block->GetSuccessors().Get(i); - output_ << " \"B" << successor->GetBlockId() << "\" "; - } + for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) { + HBasicBlock* successor = block->GetSuccessors().Get(i); + output_ << " \"B" << successor->GetBlockId() << "\" "; } output_<< std::endl; } @@ -263,11 +263,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintExceptionHandlers(HBasicBlock* block) { AddIndent(); output_ << "xhandlers"; - for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { - if (block->IsExceptionalSuccessor(i)) { - HBasicBlock* handler = block->GetSuccessors().Get(i); - output_ << " \"B" << handler->GetBlockId() << "\" "; - } + for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().Size(); ++i) { + HBasicBlock* handler = block->GetSuccessors().Get(i); + output_ << " \"B" << handler->GetBlockId() << "\" "; } if (block->IsExitBlock() && (disasm_info_ != nullptr) && @@ -351,6 +349,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitPhi(HPhi* phi) OVERRIDE { StartAttributeStream("reg") << phi->GetRegNumber(); + StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha; } void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { @@ -586,7 +585,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void Run() { StartTag("cfg"); - std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)"); + std::string pass_desc = std::string(pass_name_) + + " (" + + (is_after_pass_ ? "after" : "before") + + (graph_in_bad_state_ ? ", bad_state" : "") + + ")"; PrintProperty("name", pass_desc.c_str()); if (disasm_info_ != nullptr) { DumpDisassemblyBlockForFrameEntry(); @@ -655,6 +658,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { std::ostream& output_; const char* pass_name_; const bool is_after_pass_; + const bool graph_in_bad_state_; const CodeGenerator& codegen_; const DisassemblyInformation* disasm_info_; std::unique_ptr<HGraphVisualizerDisassembler> disassembler_; @@ -670,7 +674,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, void HGraphVisualizer::PrintHeader(const char* method_name) const { DCHECK(output_ != nullptr); - HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); + HGraphVisualizerPrinter printer(graph_, *output_, "", true, false, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); printer.PrintProperty("method", method_name); @@ -678,10 +682,17 @@ void HGraphVisualizer::PrintHeader(const char* method_name) const { printer.EndTag("compilation"); } -void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const { +void HGraphVisualizer::DumpGraph(const char* pass_name, + bool is_after_pass, + bool graph_in_bad_state) const { DCHECK(output_ != nullptr); if (!graph_->GetBlocks().IsEmpty()) { - HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_); + HGraphVisualizerPrinter printer(graph_, + *output_, + pass_name, + is_after_pass, + graph_in_bad_state, + codegen_); printer.Run(); } } @@ -689,8 +700,13 @@ void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) cons void HGraphVisualizer::DumpGraphWithDisassembly() const { DCHECK(output_ != nullptr); if (!graph_->GetBlocks().IsEmpty()) { - HGraphVisualizerPrinter printer( - graph_, *output_, "disassembly", true, codegen_, codegen_.GetDisassemblyInformation()); + HGraphVisualizerPrinter printer(graph_, + *output_, + "disassembly", + /* is_after_pass */ true, + /* graph_in_bad_state */ false, + codegen_, + codegen_.GetDisassemblyInformation()); printer.Run(); } } diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index b6b66df601..66588f6e36 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -104,7 +104,7 @@ class HGraphVisualizer : public ValueObject { const CodeGenerator& codegen); void PrintHeader(const char* method_name) const; - void DumpGraph(const char* pass_name, bool is_after_pass = true) const; + void DumpGraph(const char* pass_name, bool is_after_pass, bool graph_in_bad_state) const; void DumpGraphWithDisassembly() const; private: diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 708733e28c..39006465d5 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -120,7 +120,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { // Removes all instructions in the set affected by the given side effects. void Kill(SideEffects side_effects) { DeleteAllImpureWhich([side_effects](Node* node) { - return node->GetInstruction()->GetSideEffects().DependsOn(side_effects); + return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects); }); } @@ -264,7 +264,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { // odd buckets to speed up deletion. size_t HashCode(HInstruction* instruction) const { size_t hash_code = instruction->ComputeHashCode(); - if (instruction->GetSideEffects().HasDependencies()) { + if (instruction->GetSideEffects().DoesAnyRead()) { return (hash_code << 1) | 0; } else { return (hash_code << 1) | 1; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index d8a09ffc38..5c6239b3f9 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -206,7 +206,7 @@ TEST(GVNTest, LoopFieldElimination) { // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, parameter, - Primitive::kPrimNot, + Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, @@ -323,9 +323,10 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); } // Check that the side effects of the outer loop does not affect the inner loop. @@ -343,10 +344,10 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); } // Check that the side effects of the inner loop affects the outer loop. @@ -365,10 +366,10 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); } } } // namespace art diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 3efe7c77fa..cea7dd9b8d 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -326,7 +326,8 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &outer_compilation_unit_, resolved_method->GetDexFile(), compiler_driver_, - &inline_stats); + &inline_stats, + resolved_method->GetQuickenedInfo()); if (!builder.BuildGraph(*code_item)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 2535ea274a..5b89b4ec74 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -115,7 +115,7 @@ void LICM::Run() { HInstruction* instruction = inst_it.Current(); if (instruction->CanBeMoved() && (!instruction->CanThrow() || !found_first_non_hoisted_throwing_instruction_in_loop) - && !instruction->GetSideEffects().DependsOn(loop_effects) + && !instruction->GetSideEffects().MayDependOn(loop_effects) && InputsAreDefinedBeforeLoop(instruction)) { // We need to update the environment if the instruction has a loop header // phi in it. diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc new file mode 100644 index 0000000000..6e6e0b5803 --- /dev/null +++ b/compiler/optimizing/licm_test.cc @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/arena_allocator.h" +#include "builder.h" +#include "gtest/gtest.h" +#include "licm.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +namespace art { + +/** + * Fixture class for the LICM tests. + */ +class LICMTest : public testing::Test { + public: + LICMTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); + } + + ~LICMTest() { } + + // Builds a singly-nested loop structure in CFG. Tests can further populate + // the basic blocks with instructions to set up interesting scenarios. + void BuildLoop() { + entry_ = new (&allocator_) HBasicBlock(graph_); + loop_preheader_ = new (&allocator_) HBasicBlock(graph_); + loop_header_ = new (&allocator_) HBasicBlock(graph_); + loop_body_ = new (&allocator_) HBasicBlock(graph_); + exit_ = new (&allocator_) HBasicBlock(graph_); + + graph_->AddBlock(entry_); + graph_->AddBlock(loop_preheader_); + graph_->AddBlock(loop_header_); + graph_->AddBlock(loop_body_); + graph_->AddBlock(exit_); + + graph_->SetEntryBlock(entry_); + graph_->SetExitBlock(exit_); + + // Set up loop flow in CFG. + entry_->AddSuccessor(loop_preheader_); + loop_preheader_->AddSuccessor(loop_header_); + loop_header_->AddSuccessor(loop_body_); + loop_header_->AddSuccessor(exit_); + loop_body_->AddSuccessor(loop_header_); + + // Provide boiler-plate instructions. + parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot); + entry_->AddInstruction(parameter_); + constant_ = new (&allocator_) HConstant(Primitive::kPrimInt); + loop_preheader_->AddInstruction(constant_); + loop_header_->AddInstruction(new (&allocator_) HIf(parameter_)); + loop_body_->AddInstruction(new (&allocator_) HGoto()); + exit_->AddInstruction(new (&allocator_) HExit()); + } + + // Performs LICM optimizations (after proper set up). + void PerformLICM() { + ASSERT_TRUE(graph_->TryBuildingSsa()); + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + LICM licm(graph_, side_effects); + licm.Run(); + } + + // General building fields. + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + + // Specific basic blocks. + HBasicBlock* entry_; + HBasicBlock* loop_preheader_; + HBasicBlock* loop_header_; + HBasicBlock* loop_body_; + HBasicBlock* exit_; + + HInstruction* parameter_; // "this" + HInstruction* constant_; +}; + +// +// The actual LICM tests. +// + +TEST_F(LICMTest, ConstantHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set array to constant. + HInstruction* constant = new (&allocator_) HConstant(Primitive::kPrimDouble); + loop_body_->InsertInstructionBefore(constant, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, constant_, constant, Primitive::kPrimDouble, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + CHECK_EQ(constant->GetBlock(), loop_body_); + CHECK_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + CHECK_EQ(constant->GetBlock(), loop_preheader_); + CHECK_EQ(set_array->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, FieldHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get field with different types. + HInstruction* get_field = new (&allocator_) HInstanceFieldGet( + parameter_, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); + HInstruction* set_field = new (&allocator_) HInstanceFieldSet( + parameter_, constant_, Primitive::kPrimInt, MemberOffset(20), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); + + CHECK_EQ(get_field->GetBlock(), loop_body_); + CHECK_EQ(set_field->GetBlock(), loop_body_); + PerformLICM(); + CHECK_EQ(get_field->GetBlock(), loop_preheader_); + CHECK_EQ(set_field->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, NoFieldHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get field with same types. + HInstruction* get_field = new (&allocator_) HInstanceFieldGet( + parameter_, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); + HInstruction* set_field = new (&allocator_) HInstanceFieldSet( + parameter_, get_field, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); + + CHECK_EQ(get_field->GetBlock(), loop_body_); + CHECK_EQ(set_field->GetBlock(), loop_body_); + PerformLICM(); + CHECK_EQ(get_field->GetBlock(), loop_body_); + CHECK_EQ(set_field->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, ArrayHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get array with different types. + HInstruction* get_array = new (&allocator_) HArrayGet( + parameter_, constant_, Primitive::kPrimLong); + loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, constant_, constant_, Primitive::kPrimInt, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + CHECK_EQ(get_array->GetBlock(), loop_body_); + CHECK_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + CHECK_EQ(get_array->GetBlock(), loop_preheader_); + CHECK_EQ(set_array->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, NoArrayHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get array with same types. + HInstruction* get_array = new (&allocator_) HArrayGet( + parameter_, constant_, Primitive::kPrimLong); + loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, get_array, constant_, Primitive::kPrimLong, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + CHECK_EQ(get_array->GetBlock(), loop_body_); + CHECK_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + CHECK_EQ(get_array->GetBlock(), loop_body_); + CHECK_EQ(set_array->GetBlock(), loop_body_); +} + +} // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 588ab70001..519fa005a6 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -98,26 +98,31 @@ void HGraph::VisitBlockForBackEdges(HBasicBlock* block, } void HGraph::BuildDominatorTree() { + // (1) Simplify the CFG so that catch blocks have only exceptional incoming + // edges. This invariant simplifies building SSA form because Phis cannot + // collect both normal- and exceptional-flow values at the same time. + SimplifyCatchBlocks(); + ArenaBitVector visited(arena_, blocks_.Size(), false); - // (1) Find the back edges in the graph doing a DFS traversal. + // (2) Find the back edges in the graph doing a DFS traversal. FindBackEdges(&visited); - // (2) Remove instructions and phis from blocks not visited during + // (3) Remove instructions and phis from blocks not visited during // the initial DFS as users from other instructions, so that // users can be safely removed before uses later. RemoveInstructionsAsUsersFromDeadBlocks(visited); - // (3) Remove blocks not visited during the initial DFS. + // (4) Remove blocks not visited during the initial DFS. // Step (4) requires dead blocks to be removed from the // predecessors list of live blocks. RemoveDeadBlocks(visited); - // (4) Simplify the CFG now, so that we don't need to recompute + // (5) Simplify the CFG now, so that we don't need to recompute // dominators and the reverse post order. SimplifyCFG(); - // (5) Compute the dominance information and the reverse post order. + // (6) Compute the dominance information and the reverse post order. ComputeDominanceInformation(); } @@ -261,6 +266,83 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { info->SetSuspendCheck(first_instruction->AsSuspendCheck()); } +static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) { + HBasicBlock* predecessor = block.GetPredecessors().Get(pred_idx); + if (!predecessor->EndsWithTryBoundary()) { + // Only edges from HTryBoundary can be exceptional. + return false; + } + HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary(); + if (try_boundary->GetNormalFlowSuccessor() == &block) { + // This block is the normal-flow successor of `try_boundary`, but it could + // also be one of its exception handlers if catch blocks have not been + // simplified yet. Predecessors are unordered, so we will consider the first + // occurrence to be the normal edge and a possible second occurrence to be + // the exceptional edge. + return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx); + } else { + // This is not the normal-flow successor of `try_boundary`, hence it must be + // one of its exception handlers. + DCHECK(try_boundary->HasExceptionHandler(block)); + return true; + } +} + +void HGraph::SimplifyCatchBlocks() { + for (size_t i = 0; i < blocks_.Size(); ++i) { + HBasicBlock* catch_block = blocks_.Get(i); + if (!catch_block->IsCatchBlock()) { + continue; + } + + bool exceptional_predecessors_only = true; + for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + exceptional_predecessors_only = false; + break; + } + } + + if (!exceptional_predecessors_only) { + // Catch block has normal-flow predecessors and needs to be simplified. + // Splitting the block before its first instruction moves all its + // instructions into `normal_block` and links the two blocks with a Goto. + // Afterwards, incoming normal-flow edges are re-linked to `normal_block`, + // leaving `catch_block` with the exceptional edges only. + // Note that catch blocks with normal-flow predecessors cannot begin with + // a MOVE_EXCEPTION instruction, as guaranteed by the verifier. + DCHECK(!catch_block->GetFirstInstruction()->IsLoadException()); + HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction()); + for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + catch_block->GetPredecessors().Get(j)->ReplaceSuccessor(catch_block, normal_block); + --j; + } + } + } + } +} + +void HGraph::ComputeTryBlockInformation() { + // Iterate in reverse post order to propagate try membership information from + // predecessors to their successors. + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (block->IsEntryBlock() || block->IsCatchBlock()) { + // Catch blocks after simplification have only exceptional predecessors + // and hence are never in tries. + continue; + } + + // Infer try membership from the first predecessor. Having simplified loops, + // the first predecessor can never be a back edge and therefore it must have + // been visited already and had its try membership set. + HBasicBlock* first_predecessor = block->GetPredecessors().Get(0); + DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor)); + block->SetTryEntry(first_predecessor->ComputeTryEntryOfSuccessors()); + } +} + void HGraph::SimplifyCFG() { // Simplify the CFG for future analysis, and code generation: // (1): Split critical edges. @@ -268,9 +350,10 @@ void HGraph::SimplifyCFG() { for (size_t i = 0; i < blocks_.Size(); ++i) { HBasicBlock* block = blocks_.Get(i); if (block == nullptr) continue; - if (block->GetSuccessors().Size() > 1) { + if (block->NumberOfNormalSuccessors() > 1) { for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); + DCHECK(!successor->IsCatchBlock()); if (successor->GetPredecessors().Size() > 1) { SplitCriticalEdge(block, successor); --j; @@ -288,6 +371,11 @@ bool HGraph::AnalyzeNaturalLoops() const { for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return false; + } HLoopInformation* info = block->GetLoopInformation(); if (!info->Populate()) { // Abort if the loop is non natural. We currently bailout in such cases. @@ -1086,10 +1174,20 @@ HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { return new_block; } -bool HBasicBlock::IsExceptionalSuccessor(size_t idx) const { - return !GetInstructions().IsEmpty() - && GetLastInstruction()->IsTryBoundary() - && GetLastInstruction()->AsTryBoundary()->IsExceptionalSuccessor(idx); +HTryBoundary* HBasicBlock::ComputeTryEntryOfSuccessors() const { + if (EndsWithTryBoundary()) { + HTryBoundary* try_boundary = GetLastInstruction()->AsTryBoundary(); + if (try_boundary->IsEntry()) { + DCHECK(try_entry_ == nullptr); + return try_boundary; + } else { + DCHECK(try_entry_ != nullptr); + DCHECK(try_entry_->HasSameExceptionHandlersAs(*try_boundary)); + return nullptr; + } + } else { + return try_entry_; + } } static bool HasOnlyOneInstruction(const HBasicBlock& block) { @@ -1114,10 +1212,29 @@ bool HBasicBlock::EndsWithIf() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf(); } +bool HBasicBlock::EndsWithTryBoundary() const { + return !GetInstructions().IsEmpty() && GetLastInstruction()->IsTryBoundary(); +} + bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const { + if (GetBlock()->GetSuccessors().Size() != other.GetBlock()->GetSuccessors().Size()) { + return false; + } + + // Exception handler lists cannot contain duplicates, which makes it + // sufficient to test inclusion only in one direction. + for (HExceptionHandlerIterator it(other); !it.Done(); it.Advance()) { + if (!HasExceptionHandler(*it.Current())) { + return false; + } + } + return true; +} + size_t HInstructionList::CountSize() const { size_t size = 0; HInstruction* current = first_instruction_; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 57c7829c88..903e02e0ea 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -49,6 +49,7 @@ class HLongConstant; class HNullConstant; class HPhi; class HSuspendCheck; +class HTryBoundary; class LiveInterval; class LocationSummary; class SlowPathCode; @@ -182,6 +183,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // visit for eliminating dead phis: a dead phi can only have loop header phi // users remaining when being visited. if (!AnalyzeNaturalLoops()) return false; + // Precompute per-block try membership before entering the SSA builder, + // which needs the information to build catch block phis from values of + // locals at throwing instructions inside try blocks. + ComputeTryBlockInformation(); TransformToSsa(); in_ssa_form_ = true; return true; @@ -193,12 +198,17 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void BuildDominatorTree(); void TransformToSsa(); void SimplifyCFG(); + void SimplifyCatchBlocks(); // Analyze all natural loops in this graph. Returns false if one // loop is not natural, that is the header does not dominate the // back edge. bool AnalyzeNaturalLoops() const; + // Iterate over blocks to compute try block membership. Needs reverse post + // order and loop information. + void ComputeTryBlockInformation(); + // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); @@ -730,8 +740,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return GetPredecessorIndexOf(predecessor) == idx; } - // Returns whether successor at index `idx` is an exception handler. - bool IsExceptionalSuccessor(size_t idx) const; + // Returns the number of non-exceptional successors. SsaChecker ensures that + // these are stored at the beginning of the successor list. + size_t NumberOfNormalSuccessors() const { + return EndsWithTryBoundary() ? 1 : GetSuccessors().Size(); + } // Split the block into two blocks just before `cursor`. Returns the newly // created, latter block. Note that this method will add the block to the @@ -830,6 +843,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool IsInLoop() const { return loop_information_ != nullptr; } + HTryBoundary* GetTryEntry() const { return try_entry_; } + void SetTryEntry(HTryBoundary* try_entry) { try_entry_ = try_entry; } + bool IsInTry() const { return try_entry_ != nullptr; } + + // Returns the try entry that this block's successors should have. They will + // be in the same try, unless the block ends in a try boundary. In that case, + // the appropriate try entry will be returned. + HTryBoundary* ComputeTryEntryOfSuccessors() const; + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; @@ -846,6 +868,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool EndsWithControlFlowInstruction() const; bool EndsWithIf() const; + bool EndsWithTryBoundary() const; bool HasSinglePhi() const; private: @@ -864,6 +887,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { size_t lifetime_end_; bool is_catch_block_; + // If this block is in a try block, `try_entry_` stores one of, possibly + // several, TryBoundary instructions entering it. + HTryBoundary* try_entry_; + friend class HGraph; friend class HInstruction; @@ -1155,13 +1182,25 @@ class HUserRecord : public ValueObject { HUseListNode<T>* use_node_; }; -// TODO: Add better documentation to this class and maybe refactor with more suggestive names. -// - Has(All)SideEffects suggests that all the side effects are present but only ChangesSomething -// flag is consider. -// - DependsOn suggests that there is a real dependency between side effects but it only -// checks DependendsOnSomething flag. -// -// Represents the side effects an instruction may have. +/** + * Side-effects representation for write/read dependences on fields/arrays. + * + * The dependence analysis uses type disambiguation (e.g. a float field write + * cannot modify the value of an integer field read) and the access type (e.g. + * a reference array write cannot modify the value of a reference field read + * [although it may modify the reference fetch prior to reading the field, + * which is represented by its own write/read dependence]). The analysis + * makes conservative points-to assumptions on reference types (e.g. two same + * typed arrays are assumed to be the same, and any reference read depends + * on any reference read without further regard of its type). + * + * The internal representation uses the following 36-bit flags assignments: + * + * |ARRAY-R |FIELD-R |ARRAY-W |FIELD-W | + * +---------+---------+---------+---------+ + * |543210987|654321098|765432109|876543210| + * |DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL| + */ class SideEffects : public ValueObject { public: SideEffects() : flags_(0) {} @@ -1171,57 +1210,125 @@ class SideEffects : public ValueObject { } static SideEffects All() { - return SideEffects(ChangesSomething().flags_ | DependsOnSomething().flags_); + return SideEffects(kAllWrites | kAllReads); } - static SideEffects ChangesSomething() { - return SideEffects((1 << kFlagChangesCount) - 1); + static SideEffects AllWrites() { + return SideEffects(kAllWrites); } - static SideEffects DependsOnSomething() { - int count = kFlagDependsOnCount - kFlagChangesCount; - return SideEffects(((1 << count) - 1) << kFlagChangesCount); + static SideEffects AllReads() { + return SideEffects(kAllReads); } + static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) { + return is_volatile + ? All() + : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset)); + } + + static SideEffects ArrayWriteOfType(Primitive::Type type) { + return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset)); + } + + static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) { + return is_volatile + ? All() + : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset)); + } + + static SideEffects ArrayReadOfType(Primitive::Type type) { + return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset)); + } + + // Combines the side-effects of this and the other. SideEffects Union(SideEffects other) const { return SideEffects(flags_ | other.flags_); } - bool HasSideEffects() const { - size_t all_bits_set = (1 << kFlagChangesCount) - 1; - return (flags_ & all_bits_set) != 0; + // Returns true if something is written. + bool DoesAnyWrite() const { + return (flags_ & kAllWrites); } - bool HasAllSideEffects() const { - size_t all_bits_set = (1 << kFlagChangesCount) - 1; - return all_bits_set == (flags_ & all_bits_set); + // Returns true if something is read. + bool DoesAnyRead() const { + return (flags_ & kAllReads); } - bool DependsOn(SideEffects other) const { - size_t depends_flags = other.ComputeDependsFlags(); - return (flags_ & depends_flags) != 0; + // Returns true if nothing is written or read. + bool DoesNothing() const { + return flags_ == 0; } - bool HasDependencies() const { - int count = kFlagDependsOnCount - kFlagChangesCount; - size_t all_bits_set = (1 << count) - 1; - return ((flags_ >> kFlagChangesCount) & all_bits_set) != 0; + // Returns true if potentially everything is written and read + // (every type and every kind of access). + bool DoesAll() const { + return flags_ == (kAllWrites | kAllReads); } - private: - static constexpr int kFlagChangesSomething = 0; - static constexpr int kFlagChangesCount = kFlagChangesSomething + 1; + // Returns true if this may read something written by other. + bool MayDependOn(SideEffects other) const { + const uint64_t reads = (flags_ & kAllReads) >> kFieldReadOffset; + return (other.flags_ & reads); + } - static constexpr int kFlagDependsOnSomething = kFlagChangesCount; - static constexpr int kFlagDependsOnCount = kFlagDependsOnSomething + 1; + // Returns string representation of flags (for debugging only). + // Format: |DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL| + std::string ToString() const { + static const char *kDebug = "LZBCSIJFD"; + std::string flags = "|"; + for (int s = 35; s >= 0; s--) { + const int t = s % kBits; + if ((flags_ >> s) & 1) + flags += kDebug[t]; + if (t == 0) + flags += "|"; + } + return flags; + } - explicit SideEffects(size_t flags) : flags_(flags) {} + private: + static constexpr int kBits = 9; + static constexpr int kFieldWriteOffset = 0 * kBits; + static constexpr int kArrayWriteOffset = 1 * kBits; + static constexpr int kFieldReadOffset = 2 * kBits; + static constexpr int kArrayReadOffset = 3 * kBits; + + static constexpr uint64_t kAllWrites = 0x0003ffff; + static constexpr uint64_t kAllReads = kAllWrites << kFieldReadOffset; + + // Work around the fact that HIR aliases I/F and J/D. + // TODO: remove this interceptor once HIR types are clean + static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) { + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + return TypeFlag(Primitive::kPrimInt, offset) | + TypeFlag(Primitive::kPrimFloat, offset); + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + return TypeFlag(Primitive::kPrimLong, offset) | + TypeFlag(Primitive::kPrimDouble, offset); + default: + return TypeFlag(type, offset); + } + } - size_t ComputeDependsFlags() const { - return flags_ << kFlagChangesCount; + // Translates type to bit flag. + static uint64_t TypeFlag(Primitive::Type type, int offset) { + CHECK_NE(type, Primitive::kPrimVoid); + const uint64_t one = 1; + const int shift = type; // 0-based consecutive enum + DCHECK_LE(kFieldWriteOffset, shift); + DCHECK_LT(shift, kArrayWriteOffset); + return one << (type + offset); } - size_t flags_; + // Private constructor on direct flags value. + explicit SideEffects(uint64_t flags) : flags_(flags) {} + + uint64_t flags_; }; // A HEnvironment object contains the values of virtual registers at a given location. @@ -1484,7 +1591,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } virtual bool IsControlFlow() const { return false; } virtual bool CanThrow() const { return false; } - bool HasSideEffects() const { return side_effects_.HasSideEffects(); } + + bool DoesAnyWrite() const { return side_effects_.DoesAnyWrite(); } // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. @@ -1976,29 +2084,24 @@ class HTryBoundary : public HTemplateInstruction<0> { // Returns whether `handler` is among its exception handlers (non-zero index // successors). - bool HasExceptionHandler(HBasicBlock* handler) const { - DCHECK(handler->IsCatchBlock()); - return GetBlock()->GetSuccessors().Contains(handler, /* start_from */ 1); - } - - // Returns whether successor at index `idx` is an exception handler. - bool IsExceptionalSuccessor(size_t idx) const { - DCHECK_LT(idx, GetBlock()->GetSuccessors().Size()); - bool is_handler = (idx != 0); - DCHECK(!is_handler || GetBlock()->GetSuccessors().Get(idx)->IsCatchBlock()); - return is_handler; + bool HasExceptionHandler(const HBasicBlock& handler) const { + DCHECK(handler.IsCatchBlock()); + return GetBlock()->GetSuccessors().Contains( + const_cast<HBasicBlock*>(&handler), /* start_from */ 1); } // If not present already, adds `handler` to its block's list of exception // handlers. void AddExceptionHandler(HBasicBlock* handler) { - if (!HasExceptionHandler(handler)) { + if (!HasExceptionHandler(*handler)) { GetBlock()->AddSuccessor(handler); } } bool IsEntry() const { return kind_ == BoundaryKind::kEntry; } + bool HasSameExceptionHandlersAs(const HTryBoundary& other) const; + DECLARE_INSTRUCTION(TryBoundary); private: @@ -2007,6 +2110,24 @@ class HTryBoundary : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HTryBoundary); }; +// Iterator over exception handlers of a given HTryBoundary, i.e. over +// exceptional successors of its basic block. +class HExceptionHandlerIterator : public ValueObject { + public: + explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary) + : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {} + + bool Done() const { return index_ == block_.GetSuccessors().Size(); } + HBasicBlock* Current() const { return block_.GetSuccessors().Get(index_); } + size_t CurrentSuccessorIndex() const { return index_; } + void Advance() { ++index_; } + + private: + const HBasicBlock& block_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator); +}; // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { @@ -2693,7 +2814,7 @@ class HInvoke : public HInstruction { uint32_t dex_pc, uint32_t dex_method_index, InvokeType original_invoke_type) - : HInstruction(SideEffects::All()), + : HInstruction(SideEffects::All()), // assume write/read on all fields/arrays number_of_arguments_(number_of_arguments), inputs_(arena, number_of_arguments), return_type_(return_type), @@ -3368,6 +3489,8 @@ class HPhi : public HInstruction { } } + bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } + size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); @@ -3483,7 +3606,9 @@ class HInstanceFieldGet : public HExpression<1> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HExpression(field_type, SideEffects::DependsOnSomething()), + : HExpression( + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) { SetRawInputAt(0, value); } @@ -3525,7 +3650,8 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction( + SideEffects::FieldWriteOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file), value_can_be_null_(true) { SetRawInputAt(0, object); @@ -3556,7 +3682,7 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { class HArrayGet : public HExpression<2> { public: HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type) - : HExpression(type, SideEffects::DependsOnSomething()) { + : HExpression(type, SideEffects::ArrayReadOfType(type)) { SetRawInputAt(0, array); SetRawInputAt(1, index); } @@ -3594,7 +3720,7 @@ class HArraySet : public HTemplateInstruction<3> { HInstruction* value, Primitive::Type expected_component_type, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction(SideEffects::ArrayWriteOfType(expected_component_type)), dex_pc_(dex_pc), expected_component_type_(expected_component_type), needs_type_check_(value->GetType() == Primitive::kPrimNot), @@ -3893,7 +4019,9 @@ class HLoadString : public HExpression<1> { class HClinitCheck : public HExpression<1> { public: explicit HClinitCheck(HLoadClass* constant, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::ChangesSomething()), + : HExpression( + Primitive::kPrimNot, + SideEffects::AllWrites()), // assume write on all fields/arrays dex_pc_(dex_pc) { SetRawInputAt(0, constant); } @@ -3929,7 +4057,9 @@ class HStaticFieldGet : public HExpression<1> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HExpression(field_type, SideEffects::DependsOnSomething()), + : HExpression( + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) { SetRawInputAt(0, cls); } @@ -3968,7 +4098,8 @@ class HStaticFieldSet : public HTemplateInstruction<2> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction( + SideEffects::FieldWriteOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file), value_can_be_null_(true) { SetRawInputAt(0, cls); @@ -4155,7 +4286,8 @@ class HCheckCast : public HTemplateInstruction<2> { class HMemoryBarrier : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind) - : HTemplateInstruction(SideEffects::None()), + : HTemplateInstruction( + SideEffects::All()), // assume write/read on all fields/arrays barrier_kind_(barrier_kind) {} MemBarrierKind GetBarrierKind() { return barrier_kind_; } @@ -4176,7 +4308,8 @@ class HMonitorOperation : public HTemplateInstruction<1> { }; HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::ChangesSomething()), kind_(kind), dex_pc_(dex_pc) { + : HTemplateInstruction(SideEffects::All()), // assume write/read on all fields/arrays + kind_(kind), dex_pc_(dex_pc) { SetRawInputAt(0, object); } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index bc565468b2..f793a65bf3 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -40,7 +40,7 @@ class HOptimization : public ArenaObject<kArenaAllocMisc> { // Return the name of the pass. const char* GetPassName() const { return pass_name_; } - // Peform the analysis itself. + // Perform the analysis itself. virtual void Run() = 0; protected: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index ae1958afcb..601d668995 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -35,6 +35,7 @@ #include "dex/verified_method.h" #include "dex/verification_results.h" #include "driver/compiler_driver.h" +#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" @@ -132,7 +133,7 @@ class PassObserver : public ValueObject { void StartPass(const char* pass_name) { // Dump graph first, then start timer. if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ false); + visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_); } if (timing_logger_enabled_) { timing_logger_.StartTiming(pass_name); @@ -145,7 +146,7 @@ class PassObserver : public ValueObject { timing_logger_.EndTiming(); } if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ true); + visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_); } // Validate the HGraph if running in debug mode. @@ -556,8 +557,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } // Implementation of the space filter: do not compile a code item whose size in - // code units is bigger than 256. - static constexpr size_t kSpaceFilterOptimizingThreshold = 256; + // code units is bigger than 128. + static constexpr size_t kSpaceFilterOptimizingThreshold = 128; const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace) && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { @@ -566,7 +567,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } DexCompilationUnit dex_compilation_unit( - nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, + nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, compiler_driver->GetVerifiedMethod(&dex_file, method_idx)); @@ -603,12 +604,29 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer_output_.get(), compiler_driver); + const uint8_t* interpreter_metadata = nullptr; + { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<4> hs(soa.Self()); + ClassLinker* class_linker = dex_compilation_unit.GetClassLinker(); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); + Handle<mirror::ClassLoader> loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(class_loader))); + ArtMethod* art_method = compiler_driver->ResolveMethod( + soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); + // We may not get a method, for example if its class is erroneous. + // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile. + if (art_method != nullptr) { + interpreter_metadata = art_method->GetQuickenedInfo(); + } + } HGraphBuilder builder(graph, &dex_compilation_unit, &dex_compilation_unit, &dex_file, compiler_driver, - compilation_stats_.get()); + compilation_stats_.get(), + interpreter_metadata); VLOG(compiler) << "Building " << method_name; @@ -629,7 +647,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back // to Quick. bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit(); - if (run_optimizations_ && can_optimize && can_allocate_registers) { + if (run_optimizations_ && can_allocate_registers) { VLOG(compiler) << "Optimizing " << method_name; { @@ -638,16 +656,21 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // We could not transform the graph to SSA, bailout. LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + pass_observer.SetGraphInBadState(); return nullptr; } } - return CompileOptimized(graph, - codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); - } else if (shouldOptimize && can_allocate_registers) { + if (can_optimize) { + return CompileOptimized(graph, + codegen.get(), + compiler_driver, + dex_compilation_unit, + &pass_observer); + } + } + + if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else if (can_use_baseline) { diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index ea1ca5a731..9dbf638442 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -24,14 +24,15 @@ void SideEffectsAnalysis::Run() { block_effects_.SetSize(graph_->GetBlocks().Size()); loop_effects_.SetSize(graph_->GetBlocks().Size()); + // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); SideEffects effects = GetBlockEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + DCHECK(effects.DoesNothing()); if (block->IsLoopHeader()) { effects = GetLoopEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + DCHECK(effects.DoesNothing()); } } } @@ -46,7 +47,9 @@ void SideEffectsAnalysis::Run() { inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); effects = effects.Union(instruction->GetSideEffects()); - if (effects.HasAllSideEffects()) { + // If every possible write/read is represented, scanning further + // will not add any more information to side-effects of this block. + if (effects.DoesAll()) { break; } } diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc new file mode 100644 index 0000000000..8db5a8a350 --- /dev/null +++ b/compiler/optimizing/side_effects_test.cc @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not read this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" +#include "nodes.h" +#include "primitive.h" + +namespace art { + +/** + * Tests for the SideEffects class. + */ + +// +// Helper methods. +// + +void testWriteAndReadSanity(SideEffects write, SideEffects read) { + EXPECT_FALSE(write.DoesNothing()); + EXPECT_FALSE(read.DoesNothing()); + + EXPECT_TRUE(write.DoesAnyWrite()); + EXPECT_FALSE(write.DoesAnyRead()); + EXPECT_FALSE(read.DoesAnyWrite()); + EXPECT_TRUE(read.DoesAnyRead()); + + // All-dependences. + SideEffects all = SideEffects::All(); + EXPECT_TRUE(all.MayDependOn(write)); + EXPECT_FALSE(write.MayDependOn(all)); + EXPECT_FALSE(all.MayDependOn(read)); + EXPECT_TRUE(read.MayDependOn(all)); + + // None-dependences. + SideEffects none = SideEffects::None(); + EXPECT_FALSE(none.MayDependOn(write)); + EXPECT_FALSE(write.MayDependOn(none)); + EXPECT_FALSE(none.MayDependOn(read)); + EXPECT_FALSE(read.MayDependOn(none)); +} + +void testWriteAndReadDependence(SideEffects write, SideEffects read) { + testWriteAndReadSanity(write, read); + + // Dependence only in one direction. + EXPECT_FALSE(write.MayDependOn(read)); + EXPECT_TRUE(read.MayDependOn(write)); +} + +void testNoWriteAndReadDependence(SideEffects write, SideEffects read) { + testWriteAndReadSanity(write, read); + + // No dependence in any direction. + EXPECT_FALSE(write.MayDependOn(read)); + EXPECT_FALSE(read.MayDependOn(write)); +} + +// +// Actual tests. +// + +TEST(SideEffectsTest, All) { + SideEffects all = SideEffects::All(); + EXPECT_TRUE(all.DoesAnyWrite()); + EXPECT_TRUE(all.DoesAnyRead()); + EXPECT_FALSE(all.DoesNothing()); + EXPECT_TRUE(all.DoesAll()); +} + +TEST(SideEffectsTest, None) { + SideEffects none = SideEffects::None(); + EXPECT_FALSE(none.DoesAnyWrite()); + EXPECT_FALSE(none.DoesAnyRead()); + EXPECT_TRUE(none.DoesNothing()); + EXPECT_FALSE(none.DoesAll()); +} + +TEST(SideEffectsTest, DependencesAndNoDependences) { + // Apply test to each individual primitive type. + for (Primitive::Type type = Primitive::kPrimNot; + type < Primitive::kPrimVoid; + type = Primitive::Type(type + 1)) { + // Same primitive type and access type: proper write/read dep. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(type, false), + SideEffects::FieldReadOfType(type, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(type), + SideEffects::ArrayReadOfType(type)); + // Same primitive type but different access type: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(type, false), + SideEffects::ArrayReadOfType(type)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(type), + SideEffects::FieldReadOfType(type, false)); + } +} + +TEST(SideEffectsTest, NoDependences) { + // Different primitive type, same access type: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + // Everything different: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); +} + +TEST(SideEffectsTest, VolatileDependences) { + SideEffects volatile_write = + SideEffects::FieldWriteOfType(Primitive::kPrimInt, true); + SideEffects any_write = + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false); + SideEffects volatile_read = + SideEffects::FieldReadOfType(Primitive::kPrimByte, true); + SideEffects any_read = + SideEffects::FieldReadOfType(Primitive::kPrimByte, false); + + EXPECT_FALSE(volatile_write.MayDependOn(any_read)); + EXPECT_TRUE(any_read.MayDependOn(volatile_write)); + EXPECT_TRUE(volatile_write.MayDependOn(any_write)); + EXPECT_FALSE(any_write.MayDependOn(volatile_write)); + + EXPECT_FALSE(volatile_read.MayDependOn(any_read)); + EXPECT_TRUE(any_read.MayDependOn(volatile_read)); + EXPECT_TRUE(volatile_read.MayDependOn(any_write)); + EXPECT_FALSE(any_write.MayDependOn(volatile_read)); +} + +TEST(SideEffectsTest, SameWidthTypes) { + // Type I/F. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::FieldReadOfType(Primitive::kPrimFloat, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); + // Type L/D. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimLong, false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimLong), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); +} + +TEST(SideEffectsTest, AllWritesAndReads) { + SideEffects s = SideEffects::None(); + // Keep taking the union of different writes and reads. + for (Primitive::Type type = Primitive::kPrimNot; + type < Primitive::kPrimVoid; + type = Primitive::Type(type + 1)) { + s = s.Union(SideEffects::FieldWriteOfType(type, false)); + s = s.Union(SideEffects::ArrayWriteOfType(type)); + s = s.Union(SideEffects::FieldReadOfType(type, false)); + s = s.Union(SideEffects::ArrayReadOfType(type)); + } + EXPECT_TRUE(s.DoesAll()); +} + +TEST(SideEffectsTest, BitStrings) { + EXPECT_STREQ( + "|||||", + SideEffects::None().ToString().c_str()); + EXPECT_STREQ( + "|DFJISCBZL|DFJISCBZL|DFJISCBZL|DFJISCBZL|", + SideEffects::All().ToString().c_str()); + EXPECT_STREQ( + "|||DFJISCBZL|DFJISCBZL|", + SideEffects::AllWrites().ToString().c_str()); + EXPECT_STREQ( + "|DFJISCBZL|DFJISCBZL|||", + SideEffects::AllReads().ToString().c_str()); + EXPECT_STREQ( + "||||L|", + SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str()); + EXPECT_STREQ( + "|||Z||", + SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str()); + EXPECT_STREQ( + "||B|||", + SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str()); + EXPECT_STREQ( + "|DJ||||", // note: DJ alias + SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); + SideEffects s = SideEffects::None(); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false)); + s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); + s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false)); + s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); + s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + EXPECT_STREQ( + "|DFJI|FI|S|DJC|", // note: DJ/FI alias. + s.ToString().c_str()); +} + +} // namespace art diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c37b1995fa..ff2e6ad821 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -350,7 +350,9 @@ HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { current_locals_ = GetLocalsFor(block); - if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // Catch phis were already created and inputs collected from throwing sites. + } else if (block->IsLoopHeader()) { // If the block is a loop header, we know we only have visited the pre header // because we are visiting in reverse post order. We create phis for all initialized // locals from the pre header. Their inputs will be populated at the end of @@ -551,19 +553,32 @@ void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { } void SsaBuilder::VisitInstruction(HInstruction* instruction) { - if (!instruction->NeedsEnvironment()) { - return; + if (instruction->NeedsEnvironment()) { + HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( + GetGraph()->GetArena(), + current_locals_->Size(), + GetGraph()->GetDexFile(), + GetGraph()->GetMethodIdx(), + instruction->GetDexPc(), + GetGraph()->GetInvokeType(), + instruction); + environment->CopyFrom(*current_locals_); + instruction->SetRawEnvironment(environment); + } + + // If in a try block, propagate values of locals into catch blocks. + if (instruction->GetBlock()->IsInTry() && instruction->CanThrow()) { + HTryBoundary* try_block = instruction->GetBlock()->GetTryEntry(); + for (HExceptionHandlerIterator it(*try_block); !it.Done(); it.Advance()) { + GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(it.Current()); + for (size_t i = 0, e = current_locals_->Size(); i < e; ++i) { + HInstruction* local_value = current_locals_->Get(i); + if (local_value != nullptr) { + handler_locals->Get(i)->AsPhi()->AddInput(local_value); + } + } + } } - HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( - GetGraph()->GetArena(), - current_locals_->Size(), - GetGraph()->GetDexFile(), - GetGraph()->GetMethodIdx(), - instruction->GetDexPc(), - GetGraph()->GetInvokeType(), - instruction); - environment->CopyFrom(*current_locals_); - instruction->SetRawEnvironment(environment); } void SsaBuilder::VisitTemporary(HTemporary* temp) { diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 1c83c4ba48..64600db648 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,9 +61,22 @@ class SsaBuilder : public HGraphVisitor { GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); if (locals == nullptr) { - locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( - GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals->SetSize(GetGraph()->GetNumberOfVRegs()); + const size_t vregs = GetGraph()->GetNumberOfVRegs(); + ArenaAllocator* arena = GetGraph()->GetArena(); + locals = new (arena) GrowableArray<HInstruction*>(arena, vregs); + locals->SetSize(vregs); + + if (block->IsCatchBlock()) { + // We record incoming inputs of catch phis at throwing instructions and + // must therefore eagerly create the phis. Unused phis will be removed + // in the dead phi analysis. + for (size_t i = 0; i < vregs; ++i) { + HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + block->AddPhi(phi); + locals->Put(i, phi); + } + } + locals_for_.Put(block->GetBlockId(), locals); } return locals; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 2f2e2d1fab..917341a1e7 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -114,6 +114,12 @@ void SsaRedundantPhiElimination::Run() { continue; } + if (phi->InputCount() == 0) { + DCHECK(phi->IsCatchPhi()); + DCHECK(phi->IsDead()); + continue; + } + // Find if the inputs of the phi are the same instruction. HInstruction* candidate = phi->InputAt(0); // A loop phi cannot have itself as the first phi. Note that this @@ -137,6 +143,11 @@ void SsaRedundantPhiElimination::Run() { continue; } + // The candidate may not dominate a phi in a catch block. + if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) { + continue; + } + if (phi->IsInLoop()) { // Because we're updating the users of this phi, we may have new // phis candidate for elimination if this phi is in a loop. Add phis that diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 65610d54a6..1f1530fa1e 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -248,7 +248,7 @@ void StackMapStream::FillIn(MemoryRegion region) { DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index b4ac1b4d1a..33207d92d2 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -55,8 +55,7 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -154,8 +153,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(2u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(4u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -304,8 +302,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(1u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -398,8 +395,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index // has a size of 1 bit. - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_location_catalog_entries); ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries)); @@ -501,8 +497,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); ASSERT_EQ(0u, location_catalog.Size()); diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 09d22703fe..0e3e08c2da 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -252,11 +252,11 @@ uint32_t Address::encodingThumbLdrdStrd() const { if (offset_ < 0) { int32_t off = -offset_; CHECK_LT(off, 1024); - CHECK_EQ((off & 3 /* 0b11 */), 0); // Must be multiple of 4. + CHECK_ALIGNED(off, 4); encoding = (am ^ (1 << kUShift)) | off >> 2; // Flip U to adjust sign. } else { CHECK_LT(offset_, 1024); - CHECK_EQ((offset_ & 3 /* 0b11 */), 0); // Must be multiple of 4. + CHECK_ALIGNED(offset_, 4); encoding = am | offset_ >> 2; } encoding |= static_cast<uint32_t>(rn_) << 16; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 88b2f2cc4d..b499dddb0c 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,6 +25,58 @@ namespace art { namespace arm { +void Thumb2Assembler::Fixup::PrepareDependents(Thumb2Assembler* assembler) { + // For each Fixup, it's easy to find the Fixups that it depends on as they are either + // the following or the preceding Fixups until we find the target. However, for fixup + // adjustment we need the reverse lookup, i.e. what Fixups depend on a given Fixup. + // This function creates a compact representation of this relationship, where we have + // all the dependents in a single array and Fixups reference their ranges by start + // index and count. (Instead of having a per-fixup vector.) + + // Count the number of dependents of each Fixup. + const FixupId end_id = assembler->fixups_.size(); + Fixup* fixups = assembler->fixups_.data(); + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + uint32_t target = fixups[fixup_id].target_; + if (target > fixups[fixup_id].location_) { + for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) { + fixups[id].dependents_count_ += 1u; + } + } else { + for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) { + fixups[id - 1u].dependents_count_ += 1u; + } + } + } + // Assign index ranges in fixup_dependents_ to individual fixups. Record the end of the + // range in dependents_start_, we shall later decrement it as we fill in fixup_dependents_. + uint32_t number_of_dependents = 0u; + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + number_of_dependents += fixups[fixup_id].dependents_count_; + fixups[fixup_id].dependents_start_ = number_of_dependents; + } + if (number_of_dependents == 0u) { + return; + } + // Create and fill in the fixup_dependents_. + assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]); + FixupId* dependents = assembler->fixup_dependents_.get(); + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + uint32_t target = fixups[fixup_id].target_; + if (target > fixups[fixup_id].location_) { + for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) { + fixups[id].dependents_start_ -= 1u; + dependents[fixups[id].dependents_start_] = fixup_id; + } + } else { + for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) { + fixups[id - 1u].dependents_start_ -= 1u; + dependents[fixups[id - 1u].dependents_start_] = fixup_id; + } + } + } +} + void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { CHECK(!label->IsBound()); @@ -32,10 +84,6 @@ void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { FixupId fixup_id = label->Position(); // The id for linked Fixup. Fixup* fixup = GetFixup(fixup_id); // Get the Fixup at this id. fixup->Resolve(bound_pc); // Fixup can be resolved now. - // Add this fixup as a dependency of all later fixups. - for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) { - GetFixup(id)->AddDependent(fixup_id); - } uint32_t fixup_location = fixup->GetLocation(); uint16_t next = buffer_.Load<uint16_t>(fixup_location); // Get next in chain. buffer_.Store<int16_t>(fixup_location, 0); @@ -59,7 +107,7 @@ void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_s uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size); if (adjustment != 0u) { *current_code_size += adjustment; - for (FixupId dependent_id : fixup->Dependents()) { + for (FixupId dependent_id : fixup->Dependents(*this)) { Fixup* dependent = GetFixup(dependent_id); dependent->IncreaseAdjustment(adjustment); if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) { @@ -71,6 +119,7 @@ void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_s } uint32_t Thumb2Assembler::AdjustFixups() { + Fixup::PrepareDependents(this); uint32_t current_code_size = buffer_.Size(); std::deque<FixupId> fixups_to_recalculate; if (kIsDebugBuild) { @@ -84,14 +133,27 @@ uint32_t Thumb2Assembler::AdjustFixups() { AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); } while (!fixups_to_recalculate.empty()) { - // Pop the fixup. - FixupId fixup_id = fixups_to_recalculate.front(); - fixups_to_recalculate.pop_front(); - Fixup* fixup = GetFixup(fixup_id); - DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); - buffer_.Store<int16_t>(fixup->GetLocation(), 0); - // See if it needs adjustment. - AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + do { + // Pop the fixup. + FixupId fixup_id = fixups_to_recalculate.front(); + fixups_to_recalculate.pop_front(); + Fixup* fixup = GetFixup(fixup_id); + DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); + buffer_.Store<int16_t>(fixup->GetLocation(), 0); + // See if it needs adjustment. + AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + } while (!fixups_to_recalculate.empty()); + + if ((current_code_size & 2) != 0 && !literals_.empty()) { + // If we need to add padding before literals, this may just push some out of range, + // so recalculate all load literals. This makes up for the fact that we don't mark + // load literal as a dependency of all previous Fixups even though it actually is. + for (Fixup& fixup : fixups_) { + if (fixup.IsLoadLiteral()) { + AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); + } + } + } } if (kIsDebugBuild) { // Check that no fixup is marked as being in fixups_to_recalculate anymore. @@ -101,7 +163,7 @@ uint32_t Thumb2Assembler::AdjustFixups() { } // Adjust literal pool labels for padding. - DCHECK_EQ(current_code_size & 1u, 0u); + DCHECK_ALIGNED(current_code_size, 2); uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size(); if (literals_adjustment != 0u) { for (Literal& literal : literals_) { @@ -152,7 +214,7 @@ void Thumb2Assembler::EmitLiterals() { // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment. // We don't support byte and half-word literals. uint32_t code_size = buffer_.Size(); - DCHECK_EQ(code_size & 1u, 0u); + DCHECK_ALIGNED(code_size, 2); if ((code_size & 2u) != 0u) { Emit16(0); } @@ -168,7 +230,7 @@ void Thumb2Assembler::EmitLiterals() { } inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); int16_t encoding = B15 | B14; if (cond != AL) { DCHECK(IsInt<9>(offset)); @@ -181,7 +243,7 @@ inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { } inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); int32_t s = (offset >> 31) & 1; // Sign bit. int32_t encoding = B31 | B30 | B29 | B28 | B15 | (s << 26) | // Sign bit goes to bit 26. @@ -205,7 +267,7 @@ inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) { DCHECK(!IsHighRegister(rn)); - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); DCHECK(IsUint<7>(offset)); DCHECK(cond == EQ || cond == NE); return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) | @@ -250,7 +312,7 @@ inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) { DCHECK(!IsHighRegister(rt)); - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); DCHECK(IsUint<10>(offset)); return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2); } @@ -261,7 +323,7 @@ inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) { } inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 | @@ -270,7 +332,7 @@ inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Regist } inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B26 | B24 | B23 /* U = 1 */ | B20 | B11 | B9 | @@ -281,7 +343,7 @@ inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32 } inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B26 | B24 | B23 /* U = 1 */ | B20 | B11 | B9 | B8 | @@ -294,7 +356,7 @@ inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32 inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) { DCHECK(!IsHighRegister(rt)); DCHECK(!IsHighRegister(rn)); - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); DCHECK(IsUint<7>(offset)); return B14 | B13 | B11 | (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) | @@ -1423,7 +1485,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 3U /* 0b11 */; opcode_shift = 12; CHECK_LT(immediate, (1u << 9)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1437,7 +1499,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 5U /* 0b101 */; opcode_shift = 11; CHECK_LT(immediate, (1u << 10)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rn from instruction. rn = R0; @@ -1474,7 +1536,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 0x61 /* 0b1100001 */; opcode_shift = 7; CHECK_LT(immediate, (1u << 9)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1652,7 +1714,7 @@ inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const { inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) { // The code size must be a multiple of 2. - DCHECK_EQ(current_code_size & 1u, 0u); + DCHECK_ALIGNED(current_code_size, 2); // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool. return current_code_size & 2; } @@ -1697,7 +1759,7 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target // being aligned, current PC alignment can be inferred from diff. - DCHECK_EQ(diff & 1, 0); + DCHECK_ALIGNED(diff, 2); diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; @@ -2045,7 +2107,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, if (sp_relative) { // SP relative, 10 bit offset. CHECK_LT(offset, (1 << 10)); - CHECK_EQ((offset & 3 /* 0b11 */), 0); + CHECK_ALIGNED(offset, 4); encoding |= rd << 8 | offset >> 2; } else { // No SP relative. The offset is shifted right depending on @@ -2058,12 +2120,12 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } else if (half) { // 6 bit offset, shifted by 1. CHECK_LT(offset, (1 << 6)); - CHECK_EQ((offset & 1 /* 0b1 */), 0); + CHECK_ALIGNED(offset, 2); offset >>= 1; } else { // 7 bit offset, shifted by 2. CHECK_LT(offset, (1 << 7)); - CHECK_EQ((offset & 3 /* 0b11 */), 0); + CHECK_ALIGNED(offset, 4); offset >>= 2; } encoding |= rn << 3 | offset << 6; @@ -2220,17 +2282,7 @@ void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x if (label->IsBound()) { // The branch is to a bound label which means that it's a backwards branch. - // Record this branch as a dependency of all Fixups between the label and the branch. GetFixup(branch_id)->Resolve(label->Position()); - for (FixupId fixup_id = branch_id; fixup_id != 0u; ) { - --fixup_id; - Fixup* fixup = GetFixup(fixup_id); - DCHECK_GE(label->Position(), 0); - if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) { - break; - } - fixup->AddDependent(branch_id); - } Emit16(0); } else { // Branch target is an unbound label. Add it to a singly-linked list maintained within diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 5e6969b4c2..41eb5d36f2 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -24,6 +24,7 @@ #include "constants_arm.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm/assembler_arm.h" +#include "utils/array_ref.h" #include "offsets.h" namespace art { @@ -37,6 +38,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { it_cond_index_(kNoItCondition), next_condition_(AL), fixups_(), + fixup_dependents_(), literals_(), last_position_adjustment_(0u), last_old_position_(0u), @@ -487,6 +489,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { return type_; } + bool IsLoadLiteral() const { + return GetType() >= kLoadLiteralNarrow; + } + Size GetOriginalSize() const { return original_size_; } @@ -507,12 +513,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { return adjustment_; } - const std::vector<FixupId>& Dependents() const { - return dependents_; - } + // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_. + static void PrepareDependents(Thumb2Assembler* assembler); - void AddDependent(FixupId dependent_id) { - dependents_.push_back(dependent_id); + ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const { + return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_, + dependents_count_); } // Resolve a branch when the target is known. @@ -557,7 +563,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { location_(location), target_(kUnresolved), adjustment_(0u), - dependents_() { + dependents_count_(0u), + dependents_start_(0u) { } static size_t SizeInBytes(Size size); @@ -584,7 +591,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { uint32_t location_; // Offset into assembler buffer in bytes. uint32_t target_; // Offset into assembler buffer in bytes. uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. - std::vector<FixupId> dependents_; // Fixups that require adjustment when current size changes. + // Fixups that require adjustment when current size changes are stored in a single + // array in the assembler and we store only the start index and count here. + uint32_t dependents_count_; + uint32_t dependents_start_; }; // Emit a single 32 or 16 bit data processing instruction. @@ -760,6 +770,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); std::vector<Fixup> fixups_; + std::unique_ptr<FixupId[]> fixup_dependents_; // Use std::deque<> for literal labels to allow insertions at the end // without invalidating pointers and references to existing elements. diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 68b7931a0c..004853f224 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -950,4 +950,65 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { __ GetAdjustedPosition(label.Position())); } +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiBDueToAlignmentOnSecondPass) { + // First part: as TwoCbzBeyondMaxOffset but add one 16-bit instruction to the end, + // so that the size is not Aligned<4>(.). On the first pass, the assembler resizes + // the second CBZ because it's out of range, then it will resize the first CBZ + // which has been pushed out of range. Thus, after the first pass, the code size + // will appear Aligned<4>(.) but the final size will not be. + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + __ ldr(arm::R0, arm::Address(arm::R0)); + + std::string expected_part1 = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n" // Here the offset is Aligned<4>(.). + "ldr r0, [r0]\n"; // Make the first part + + // Second part: as LoadLiteralMax1KiB with the caveat that the offset of the load + // literal will not be Aligned<4>(.) but it will appear to be when we process the + // instruction during the first pass, so the literal will need a padding and it + // will push the literal out of range, so we shall end up with "ldr.w". + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + expected_part1 + + "1:\n" + "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + } // namespace art diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index ff5a77c97a..303e0d5ad4 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -62,14 +62,14 @@ class ArrayRef { } template <size_t size> - constexpr ArrayRef(T (&array)[size]) + explicit constexpr ArrayRef(T (&array)[size]) : array_(array), size_(size) { } template <typename U, size_t size> - constexpr ArrayRef(U (&array)[size], - typename std::enable_if<std::is_same<T, const U>::value, tag>::type - t ATTRIBUTE_UNUSED = tag()) + explicit constexpr ArrayRef(U (&array)[size], + typename std::enable_if<std::is_same<T, const U>::value, tag>::type + t ATTRIBUTE_UNUSED = tag()) : array_(array), size_(size) { } @@ -83,9 +83,9 @@ class ArrayRef { } template <typename U, typename Alloc> - ArrayRef(const std::vector<U, Alloc>& v, - typename std::enable_if<std::is_same<T, const U>::value, tag>::type - t ATTRIBUTE_UNUSED = tag()) + explicit ArrayRef(const std::vector<U, Alloc>& v, + typename std::enable_if<std::is_same<T, const U>::value, tag>::type + t ATTRIBUTE_UNUSED = tag()) : array_(v.data()), size_(v.size()) { } |