diff options
Diffstat (limited to 'compiler')
139 files changed, 6458 insertions, 2655 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 39470785ab..49449157fc 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -171,6 +171,7 @@ LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \ dex/quick/mips/mips_lir.h \ dex/quick/resource_mask.h \ dex/compiler_enums.h \ + dex/dex_to_dex_compiler.h \ dex/global_value_numbering.h \ dex/pass_me.h \ driver/compiler_driver.h \ diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index d215662645..dc2bc5c3f4 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -46,12 +46,12 @@ class CommonCompilerTest : public CommonRuntimeTest { // Create an OatMethod based on pointers (for unit tests). OatFile::OatMethod CreateOatMethod(const void* code); - void MakeExecutable(ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void MakeExecutable(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); static void MakeExecutable(const void* code_start, size_t code_length); void MakeExecutable(mirror::ClassLoader* class_loader, const char* class_name) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); protected: virtual void SetUp(); @@ -76,17 +76,17 @@ class CommonCompilerTest : public CommonRuntimeTest { virtual void TearDown(); void CompileClass(mirror::ClassLoader* class_loader, const char* class_name) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); - void CompileMethod(ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void CompileMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name, const char* method_name, const char* signature) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void CompileVirtualMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name, const char* method_name, const char* signature) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void ReserveImageSpace(); diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index d1acada6dd..74ef35e740 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -23,20 +23,12 @@ CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instr const ArrayRef<const uint8_t>& quick_code, bool owns_code_array) : compiler_driver_(compiler_driver), instruction_set_(instruction_set), owns_code_array_(owns_code_array), quick_code_(nullptr) { - SetCode(&quick_code); -} - -void CompiledCode::SetCode(const ArrayRef<const uint8_t>* quick_code) { - if (quick_code != nullptr) { - CHECK(!quick_code->empty()); - if (owns_code_array_) { - // If we are supposed to own the code, don't deduplicate it. - CHECK(quick_code_ == nullptr); - quick_code_ = new SwapVector<uint8_t>(quick_code->begin(), quick_code->end(), - compiler_driver_->GetSwapSpaceAllocator()); - } else { - quick_code_ = compiler_driver_->DeduplicateCode(*quick_code); - } + if (owns_code_array_) { + // If we are supposed to own the code, don't deduplicate it. + quick_code_ = new SwapVector<uint8_t>(quick_code.begin(), quick_code.end(), + compiler_driver_->GetSwapSpaceAllocator()); + } else { + quick_code_ = compiler_driver_->DeduplicateCode(quick_code); } } diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 45a62bc6c7..a4d2387030 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -47,8 +47,6 @@ class CompiledCode { return quick_code_; } - void SetCode(const ArrayRef<const uint8_t>* quick_code); - bool operator==(const CompiledCode& rhs) const; // To align an offset from a page-aligned value to make it suitable diff --git a/compiler/compiler.h b/compiler/compiler.h index e5d1aff08c..01ca46efd3 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -58,7 +58,7 @@ class Compiler { const DexFile& dex_file) const = 0; virtual uintptr_t GetEntryPointOf(ArtMethod* method) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; + SHARED_REQUIRES(Locks::mutator_lock_) = 0; uint64_t GetMaximumCompilationTimeBeforeWarning() const { return maximum_compilation_time_before_warning_; @@ -89,7 +89,7 @@ class Compiler { const DexFile& dex_file); protected: - explicit Compiler(CompilerDriver* driver, uint64_t warning) : + Compiler(CompilerDriver* driver, uint64_t warning) : driver_(driver), maximum_compilation_time_before_warning_(warning) { } diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index d28df1dcce..5203355d06 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -129,7 +129,7 @@ struct OptionContent { * Union containing the option value of either type. */ union OptionContainer { - explicit OptionContainer(const OptionContainer& c, OptionType t) { + OptionContainer(const OptionContainer& c, OptionType t) { if (t == kString) { DCHECK(c.s != nullptr); s = strndup(c.s, kOptionStringMaxLength); diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index bd590467e3..603130ab96 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -14,10 +14,13 @@ * limitations under the License. */ +#include "dex_to_dex_compiler.h" + #include "art_field-inl.h" #include "art_method-inl.h" #include "base/logging.h" #include "base/mutex.h" +#include "compiled_method.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" #include "driver/compiler_driver.h" @@ -34,6 +37,13 @@ const bool kEnableQuickening = true; // Control check-cast elision. const bool kEnableCheckCastEllision = true; +struct QuickenedInfo { + QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} + + uint32_t dex_pc; + uint16_t dex_member_index; +}; + class DexCompiler { public: DexCompiler(art::CompilerDriver& compiler, @@ -47,13 +57,17 @@ class DexCompiler { void Compile(); + const std::vector<QuickenedInfo>& GetQuickenedInfo() const { + return quickened_info_; + } + private: const DexFile& GetDexFile() const { return *unit_.GetDexFile(); } bool PerformOptimizations() const { - return dex_to_dex_compilation_level_ >= kOptimize; + return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize; } // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where @@ -87,11 +101,16 @@ class DexCompiler { const DexCompilationUnit& unit_; const DexToDexCompilationLevel dex_to_dex_compilation_level_; + // Filled by the compiler when quickening, in order to encode that information + // in the .oat file. The runtime will use that information to get to the original + // opcodes. + std::vector<QuickenedInfo> quickened_info_; + DISALLOW_COPY_AND_ASSIGN(DexCompiler); }; void DexCompiler::Compile() { - DCHECK_GE(dex_to_dex_compilation_level_, kRequired); + DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired); const DexFile::CodeItem* code_item = unit_.GetCodeItem(); const uint16_t* insns = code_item->insns_; const uint32_t insns_size = code_item->insns_size_in_code_units_; @@ -248,6 +267,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, inst->SetOpcode(new_opcode); // Replace field index by field offset. inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); + quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx)); } } @@ -287,24 +307,61 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, } else { inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); } + quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx)); } } } -} // namespace optimizer -} // namespace art - -extern "C" void ArtCompileDEX(art::CompilerDriver& driver, const art::DexFile::CodeItem* code_item, - uint32_t access_flags, art::InvokeType invoke_type, - uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, - const art::DexFile& dex_file, - art::DexToDexCompilationLevel dex_to_dex_compilation_level) { - UNUSED(invoke_type); - if (dex_to_dex_compilation_level != art::kDontDexToDexCompile) { +CompiledMethod* ArtCompileDEX( + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type ATTRIBUTE_UNUSED, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + DexToDexCompilationLevel dex_to_dex_compilation_level) { + DCHECK(driver != nullptr); + if (dex_to_dex_compilation_level != DexToDexCompilationLevel::kDontDexToDexCompile) { art::DexCompilationUnit unit(nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, - driver.GetVerifiedMethod(&dex_file, method_idx)); - art::optimizer::DexCompiler dex_compiler(driver, unit, dex_to_dex_compilation_level); + driver->GetVerifiedMethod(&dex_file, method_idx)); + art::optimizer::DexCompiler dex_compiler(*driver, unit, dex_to_dex_compilation_level); dex_compiler.Compile(); + if (dex_compiler.GetQuickenedInfo().empty()) { + // No need to create a CompiledMethod if there are no quickened opcodes. + return nullptr; + } + + // Create a `CompiledMethod`, with the quickened information in the vmap table. + Leb128EncodingVector builder; + for (QuickenedInfo info : dex_compiler.GetQuickenedInfo()) { + builder.PushBackUnsigned(info.dex_pc); + builder.PushBackUnsigned(info.dex_member_index); + } + InstructionSet instruction_set = driver->GetInstructionSet(); + if (instruction_set == kThumb2) { + // Don't use the thumb2 instruction set to avoid the one off code delta. + instruction_set = kArm; + } + return CompiledMethod::SwapAllocCompiledMethod( + driver, + instruction_set, + ArrayRef<const uint8_t>(), // no code + 0, + 0, + 0, + nullptr, // src_mapping_table + ArrayRef<const uint8_t>(), // mapping_table + ArrayRef<const uint8_t>(builder.GetData()), // vmap_table + ArrayRef<const uint8_t>(), // gc_map + ArrayRef<const uint8_t>(), // cfi data + ArrayRef<const LinkerPatch>()); } + return nullptr; } + +} // namespace optimizer + +} // namespace art diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h new file mode 100644 index 0000000000..3fad6d4c95 --- /dev/null +++ b/compiler/dex/dex_to_dex_compiler.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ +#define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ + +#include "jni.h" + +#include "dex_file.h" +#include "invoke_type.h" + +namespace art { + +class CompiledMethod; +class CompilerDriver; + +namespace optimizer { + +enum class DexToDexCompilationLevel { + kDontDexToDexCompile, // Only meaning wrt image time interpretation. + kRequired, // Dex-to-dex compilation required for correctness. + kOptimize // Perform required transformation and peep-hole optimizations. +}; +std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); + +CompiledMethod* ArtCompileDEX(CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + DexToDexCompilationLevel dex_to_dex_compilation_level); + +} // namespace optimizer + +} // namespace art + +#endif // ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index b1f5d870d4..4de3410616 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -74,7 +74,7 @@ inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc) : num_vregs_(num_vregs), vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)), - vreg_high_words_(num_vregs, false, Allocator::GetNoopAllocator(), + vreg_high_words_(false, Allocator::GetNoopAllocator(), BitVector::BitsToWords(num_vregs), alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))), mir_data_(alloc->Adapter()) { @@ -715,6 +715,7 @@ void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_ // Try to find a MOVE to a vreg that wasn't changed since check_change. uint16_t value_name = data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg); + uint32_t dest_v_reg = mir_graph_->SRegToVReg(dest_s_reg); for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) { MIRData* d = vreg_chains_.GetMIRData(c); if (d->is_move && d->wide_def == data->wide_def && @@ -731,8 +732,21 @@ void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_ if (!vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg, mir_graph_) && (!d->wide_def || !vreg_chains_.IsVRegUsed(check_change + 1u, c, new_dest_v_reg + 1, mir_graph_))) { - RecordPassKillMoveByRenamingSrcDef(check_change, c); - return; + // If the move's destination vreg changed, check if the vreg we're trying + // to rename is unused after that change. + uint16_t dest_change = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg, c); + if (d->wide_def) { + uint16_t dest_change_high = vreg_chains_.FindFirstChangeAfter(new_dest_v_reg + 1, c); + if (dest_change_high != kNPos && + (dest_change == kNPos || dest_change_high < dest_change)) { + dest_change = dest_change_high; + } + } + if (dest_change == kNPos || + !vreg_chains_.IsVRegUsed(dest_change + 1u, size, dest_v_reg, mir_graph_)) { + RecordPassKillMoveByRenamingSrcDef(check_change, c); + return; + } } } } @@ -1192,7 +1206,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { case Instruction::CONST_WIDE_32: case Instruction::CONST_WIDE: case Instruction::CONST_WIDE_HIGH16: - case Instruction::ARRAY_LENGTH: case Instruction::CMPL_FLOAT: case Instruction::CMPG_FLOAT: case Instruction::CMPL_DOUBLE: @@ -1316,6 +1329,13 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { } break; + case Instruction::ARRAY_LENGTH: + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { + must_keep = true; + uses_all_vregs = true; + } + break; + case Instruction::AGET_OBJECT: case Instruction::AGET: case Instruction::AGET_WIDE: diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc index 461c844a60..4df0a8b98d 100644 --- a/compiler/dex/gvn_dead_code_elimination_test.cc +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -1933,6 +1933,78 @@ TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) { } } +TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps2) { + static const MIRDef mirs[] = { + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u), + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u), + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u), + }; + + // The last insn should overlap the first and second. + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 0, 2, 4 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[2]); + + static const bool eliminated[] = { + false, true, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the CONST_WIDE registers have been correctly renamed. + MIR* const_wide = &mirs_[0]; + ASSERT_EQ(2u, const_wide->ssa_rep->num_defs); + EXPECT_EQ(4, const_wide->ssa_rep->defs[0]); + EXPECT_EQ(5, const_wide->ssa_rep->defs[1]); + EXPECT_EQ(1u, const_wide->dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, LongOverlaps3) { + static const MIRDef mirs[] = { + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u), + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u), + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 4u, 2u), + }; + + // The last insn should overlap the first and second. + static const int32_t sreg_to_vreg_map[] = { 2, 3, 0, 1, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 0, 2, 4 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[2]); + + static const bool eliminated[] = { + false, true, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the CONST_WIDE registers have been correctly renamed. + MIR* const_wide = &mirs_[0]; + ASSERT_EQ(2u, const_wide->ssa_rep->num_defs); + EXPECT_EQ(4, const_wide->ssa_rep->defs[0]); + EXPECT_EQ(5, const_wide->ssa_rep->defs[1]); + EXPECT_EQ(1u, const_wide->dalvikInsn.vA); +} + TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) { static const MIRDef mirs[] = { DEF_CONST(3, Instruction::CONST, 0u, 1000u), @@ -2066,4 +2138,64 @@ TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) { } } +TEST_F(GvnDeadCodeEliminationTestSimple, ArrayLengthThrows) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 0), // null + DEF_UNOP(3, Instruction::ARRAY_LENGTH, 1u, 0u), // null.length + DEF_CONST(3, Instruction::CONST, 2u, 1000u), // Overwrite the array-length dest. + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 1 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2 }; + ExpectValueNamesNE(diff_indexes); + + static const bool eliminated[] = { + false, false, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Dependancy) { + static const MIRDef mirs[] = { + DEF_MOVE(3, Instruction::MOVE, 5u, 1u), // move v5,v1 + DEF_MOVE(3, Instruction::MOVE, 6u, 1u), // move v12,v1 + DEF_MOVE(3, Instruction::MOVE, 7u, 0u), // move v13,v0 + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 8u, 2u), // move v0_1,v2_3 + DEF_MOVE(3, Instruction::MOVE, 10u, 6u), // move v3,v12 + DEF_MOVE(3, Instruction::MOVE, 11u, 4u), // move v2,v4 + DEF_MOVE(3, Instruction::MOVE, 12u, 7u), // move v4,v13 + DEF_MOVE(3, Instruction::MOVE, 13, 11u), // move v12,v2 + DEF_MOVE(3, Instruction::MOVE, 14u, 10u), // move v2,v3 + DEF_MOVE(3, Instruction::MOVE, 15u, 5u), // move v3,v5 + DEF_MOVE(3, Instruction::MOVE, 16u, 12u), // move v5,v4 + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 12, 13, 0, 1, 3, 2, 4, 12, 2, 3, 5 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 2, 8 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, true, true, false, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + } // namespace art diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h index e4570fd8d3..053029d839 100644 --- a/compiler/dex/mir_field_info.h +++ b/compiler/dex/mir_field_info.h @@ -135,10 +135,10 @@ class MirIFieldLoweringInfo : public MirFieldInfo { // with IGET/IPUT. For fast path fields, retrieve the field offset. static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit, MirIFieldLoweringInfo* field_infos, size_t count) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Construct an unresolved instance field lowering info. - explicit MirIFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type, bool is_quickened) + MirIFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type, bool is_quickened) : MirFieldInfo(field_idx, kFlagIsVolatile | (is_quickened ? kFlagIsQuickened : 0u), type), // Without kFlagIsStatic. @@ -192,10 +192,10 @@ class MirSFieldLoweringInfo : public MirFieldInfo { // and the type index of the declaring class in the compiled method's dex file. static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit, MirSFieldLoweringInfo* field_infos, size_t count) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Construct an unresolved static field lowering info. - explicit MirSFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type) + MirSFieldLoweringInfo(uint16_t field_idx, DexMemAccessType type) : MirFieldInfo(field_idx, kFlagIsVolatile | kFlagIsStatic, type), field_offset_(0u), storage_index_(DexFile::kDexNoIndex) { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index dbe906280f..8bf709ab86 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -173,7 +173,17 @@ enum OatMethodAttributes { typedef uint16_t BasicBlockId; static const BasicBlockId NullBasicBlockId = 0; -static constexpr bool kLeafOptimization = false; + +// Leaf optimization is basically the removal of suspend checks from leaf methods. +// This is incompatible with SuspendCheckElimination (SCE) which eliminates suspend +// checks from loops that call any non-intrinsic method, since a loop that calls +// only a leaf method would end up without any suspend checks at all. So turning +// this on automatically disables the SCE in MIRGraph::EliminateSuspendChecksGate(). +// +// Since the Optimizing compiler is actually applying the same optimization, Quick +// must not run SCE anyway, so we enable this optimization as a way to disable SCE +// while keeping a consistent behavior across the backends, b/22657404. +static constexpr bool kLeafOptimization = true; /* * In general, vreg/sreg describe Dalvik registers that originated with dx. However, @@ -251,7 +261,7 @@ class MIR : public ArenaObject<kArenaAllocMIR> { uint32_t arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */ Instruction::Code opcode; - explicit DecodedInstruction():vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) { + DecodedInstruction() : vA(0), vB(0), vB_wide(0), vC(0), opcode(Instruction::NOP) { } /* @@ -343,7 +353,7 @@ class MIR : public ArenaObject<kArenaAllocMIR> { uint32_t method_lowering_info; } meta; - explicit MIR() : offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId), + MIR() : offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId), next(nullptr), ssa_rep(nullptr) { memset(&meta, 0, sizeof(meta)); } diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h index 946c74becf..4512f35a99 100644 --- a/compiler/dex/mir_method_info.h +++ b/compiler/dex/mir_method_info.h @@ -99,7 +99,7 @@ class MirMethodLoweringInfo : public MirMethodInfo { // path methods, retrieve the method's vtable index and direct code and method when applicable. static void Resolve(CompilerDriver* compiler_driver, const DexCompilationUnit* mUnit, MirMethodLoweringInfo* method_infos, size_t count) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); MirMethodLoweringInfo(uint16_t method_idx, InvokeType type, bool is_quickened) : MirMethodInfo(method_idx, diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 5bb0ce3ba5..80b7ac1e5b 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -1724,7 +1724,8 @@ void MIRGraph::StringChange() { bool MIRGraph::EliminateSuspendChecksGate() { - if ((cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 || // Disabled. + if (kLeafOptimization || // Incompatible (could create loops without suspend checks). + (cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 || // Disabled. GetMaxNestedLoops() == 0u || // Nothing to do. GetMaxNestedLoops() >= 32u || // Only 32 bits in suspend_checks_in_loops_[.]. // Exclude 32 as well to keep bit shifts well-defined. diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 10a4337cf5..47123ba28c 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -467,8 +467,17 @@ class SuspendCheckEliminationTest : public MirOptimizationTest { cu_.mir_graph->ComputeDominators(); cu_.mir_graph->ComputeTopologicalSortOrder(); cu_.mir_graph->SSATransformationEnd(); + bool gate_result = cu_.mir_graph->EliminateSuspendChecksGate(); - ASSERT_TRUE(gate_result); + ASSERT_NE(gate_result, kLeafOptimization); + if (kLeafOptimization) { + // Even with kLeafOptimization on and Gate() refusing to allow SCE, we want + // to run the SCE test to avoid bitrot, so we need to initialize explicitly. + cu_.mir_graph->suspend_checks_in_loops_ = + cu_.mir_graph->arena_->AllocArray<uint32_t>(cu_.mir_graph->GetNumBlocks(), + kArenaAllocMisc); + } + TopologicalSortIterator iterator(cu_.mir_graph.get()); bool change = false; for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h index cbe4a02edb..d0af71c061 100644 --- a/compiler/dex/pass_driver_me.h +++ b/compiler/dex/pass_driver_me.h @@ -36,7 +36,7 @@ class PassManagerOptions; class PassDriverME: public PassDriver { public: - explicit PassDriverME(const PassManager* const pass_manager, CompilationUnit* cu) + PassDriverME(const PassManager* const pass_manager, CompilationUnit* cu) : PassDriver(pass_manager), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { pass_me_data_holder_.bb = nullptr; pass_me_data_holder_.c_unit = cu; @@ -314,4 +314,3 @@ class PassDriverME: public PassDriver { }; } // namespace art #endif // ART_COMPILER_DEX_PASS_DRIVER_ME_H_ - diff --git a/compiler/dex/pass_driver_me_opts.h b/compiler/dex/pass_driver_me_opts.h index e94c1894c9..c8093d0a02 100644 --- a/compiler/dex/pass_driver_me_opts.h +++ b/compiler/dex/pass_driver_me_opts.h @@ -29,9 +29,9 @@ class PassManager; class PassDriverMEOpts : public PassDriverME { public: - explicit PassDriverMEOpts(const PassManager* const manager, - const PassManager* const post_opt_pass_manager, - CompilationUnit* cu) + PassDriverMEOpts(const PassManager* const manager, + const PassManager* const post_opt_pass_manager, + CompilationUnit* cu) : PassDriverME(manager, cu), post_opt_pass_manager_(post_opt_pass_manager) { } diff --git a/compiler/dex/pass_driver_me_post_opt.h b/compiler/dex/pass_driver_me_post_opt.h index 9e03c4e73e..94176dbf0f 100644 --- a/compiler/dex/pass_driver_me_post_opt.h +++ b/compiler/dex/pass_driver_me_post_opt.h @@ -28,7 +28,7 @@ class PassDataHolder; class PassDriverMEPostOpt : public PassDriverME { public: - explicit PassDriverMEPostOpt(const PassManager* const manager, CompilationUnit* cu) + PassDriverMEPostOpt(const PassManager* const manager, CompilationUnit* cu) : PassDriverME(manager, cu) { } diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index df4a9f2048..5f911db382 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1298,7 +1298,7 @@ void ArmMir2Lir::AssembleLIR() { */ delta &= ~0x3; } - DCHECK_EQ((delta & 0x3), 0); + DCHECK_ALIGNED(delta, 4); // First, a sanity check for cases we shouldn't see now if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 2ef92f851b..062f7aff66 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -880,7 +880,7 @@ LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStor LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base, int displacement, RegStorage r_src_dest, RegStorage r_work) { - DCHECK_EQ(displacement & 3, 0); + DCHECK_ALIGNED(displacement, 4); constexpr int kOffsetMask = 0xff << 2; int encoded_disp = (displacement & kOffsetMask) >> 2; // Within range of the instruction. RegStorage r_ptr = r_base; @@ -942,7 +942,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag already_generated = true; break; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); scale = 2; if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && (displacement >= 0)) { @@ -959,14 +959,14 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag } break; case kUnsignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; short_form = all_low && (displacement >> (5 + scale)) == 0; opcode16 = kThumbLdrhRRI5; opcode32 = kThumb2LdrhRRI12; break; case kSignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; DCHECK_EQ(opcode16, kThumbBkpt); // Not available. opcode32 = kThumb2LdrshRRI12; @@ -1096,7 +1096,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora already_generated = true; break; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); scale = 2; if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { short_form = true; @@ -1109,7 +1109,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora break; case kUnsignedHalf: case kSignedHalf: - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); scale = 1; short_form = all_low && (displacement >> (5 + scale)) == 0; opcode16 = kThumbStrhRRI5; diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index b78fb80aa0..25c69d19e5 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -909,7 +909,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); if (!IS_SIGNED_IMM26(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupT1Branch"; } @@ -933,7 +933,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); if (!IS_SIGNED_IMM19(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupLoad"; } @@ -965,7 +965,7 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - DCHECK_EQ(delta & 0x3, 0); + DCHECK_ALIGNED(delta, 4); // Check if branch offset can be encoded in tbz/tbnz. if (!IS_SIGNED_IMM14(delta >> 2)) { DexOffset dalvik_offset = lir->dalvik_offset; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 2568ee3064..42b792ca1a 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -38,6 +38,7 @@ static constexpr bool kIntrinsicIsStatic[] = { true, // kIntrinsicFloatCvt true, // kIntrinsicReverseBits true, // kIntrinsicReverseBytes + true, // kIntrinsicNumberOfLeadingZeros true, // kIntrinsicAbsInt true, // kIntrinsicAbsLong true, // kIntrinsicAbsFloat @@ -55,6 +56,7 @@ static constexpr bool kIntrinsicIsStatic[] = { false, // kIntrinsicReferenceGetReferent false, // kIntrinsicCharAt false, // kIntrinsicCompareTo + false, // kIntrinsicEquals false, // kIntrinsicGetCharsNoCheck false, // kIntrinsicIsEmptyOrLength false, // kIntrinsicIndexOf @@ -75,6 +77,8 @@ static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static"); +static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros], + "NumberOfLeadingZeros must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAbsInt], "AbsInt must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static"); @@ -92,6 +96,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicRoundDouble], "RoundDouble must be st static_assert(!kIntrinsicIsStatic[kIntrinsicReferenceGetReferent], "Get must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicCharAt], "CharAt must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicCompareTo], "CompareTo must not be static"); +static_assert(!kIntrinsicIsStatic[kIntrinsicEquals], "String equals must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicGetCharsNoCheck], "GetCharsNoCheck must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], "IsEmptyOrLength must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicIndexOf], "IndexOf must not be static"); @@ -189,6 +194,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "getReferent", // kNameCacheReferenceGet "charAt", // kNameCacheCharAt "compareTo", // kNameCacheCompareTo + "equals", // kNameCacheEquals "getCharsNoCheck", // kNameCacheGetCharsNoCheck "isEmpty", // kNameCacheIsEmpty "indexOf", // kNameCacheIndexOf @@ -225,6 +231,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "putObjectVolatile", // kNameCachePutObjectVolatile "putOrderedObject", // kNameCachePutOrderedObject "arraycopy", // kNameCacheArrayCopy + "numberOfLeadingZeros", // kNameCacheNumberOfLeadingZeros }; const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = { @@ -280,6 +287,8 @@ const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = { { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } }, // kProtoCacheJS_V { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheShort } }, + // kProtoCacheObject_Z + { kClassCacheBoolean, 1, { kClassCacheJavaLangObject } }, // kProtoCacheObjectJII_Z { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt, kClassCacheInt } }, @@ -368,6 +377,9 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32), INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64), + INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32), + INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64), + INTRINSIC(JavaLangMath, Abs, I_I, kIntrinsicAbsInt, 0), INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0), INTRINSIC(JavaLangMath, Abs, J_J, kIntrinsicAbsLong, 0), @@ -411,6 +423,7 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0), INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0), + INTRINSIC(JavaLangString, Equals, Object_Z, kIntrinsicEquals, 0), INTRINSIC(JavaLangString, GetCharsNoCheck, IICharArrayI_V, kIntrinsicGetCharsNoCheck, 0), INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty), INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone), @@ -581,6 +594,9 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { return backend->GenInlinedCharAt(info); case kIntrinsicCompareTo: return backend->GenInlinedStringCompareTo(info); + case kIntrinsicEquals: + // Quick does not implement this intrinsic. + return false; case kIntrinsicGetCharsNoCheck: return backend->GenInlinedStringGetCharsNoCheck(info); case kIntrinsicIsEmptyOrLength: @@ -614,6 +630,8 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { intrinsic.d.data & kIntrinsicFlagIsOrdered); case kIntrinsicSystemArrayCopyCharArray: return backend->GenInlinedArrayCopyCharArray(info); + case kIntrinsicNumberOfLeadingZeros: + return false; // not implemented in quick default: LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode; return false; // avoid warning "control reaches end of non-void function" diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index 26b41bf54d..d6c8bfbdb6 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -62,49 +62,49 @@ class DexFileMethodInliner { * @return true if the method is a candidate for inlining, false otherwise. */ bool AnalyseMethodCode(verifier::MethodVerifier* verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_); + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_); /** * Check whether a particular method index corresponds to an intrinsic or special function. */ - InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_); + InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) REQUIRES(!lock_); /** * Check whether a particular method index corresponds to an intrinsic function. */ - bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_); + bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) REQUIRES(!lock_); /** * Generate code for an intrinsic function invocation. */ - bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) LOCKS_EXCLUDED(lock_); + bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) REQUIRES(!lock_); /** * Check whether a particular method index corresponds to a special function. */ - bool IsSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_); + bool IsSpecial(uint32_t method_index) REQUIRES(!lock_); /** * Generate code for a special function. */ - bool GenSpecial(Mir2Lir* backend, uint32_t method_idx) LOCKS_EXCLUDED(lock_); + bool GenSpecial(Mir2Lir* backend, uint32_t method_idx) REQUIRES(!lock_); /** * Try to inline an invoke. */ bool GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, uint32_t method_idx) - LOCKS_EXCLUDED(lock_); + REQUIRES(!lock_); /** * Gets the thread pointer entrypoint offset for a string init method index and pointer size. */ uint32_t GetOffsetForStringInit(uint32_t method_index, size_t pointer_size) - LOCKS_EXCLUDED(lock_); + REQUIRES(!lock_); /** * Check whether a particular method index is a string init. */ - bool IsStringInitMethodIndex(uint32_t method_index) LOCKS_EXCLUDED(lock_); + bool IsStringInitMethodIndex(uint32_t method_index) REQUIRES(!lock_); /** * To avoid multiple lookups of a class by its descriptor, we cache its @@ -170,6 +170,7 @@ class DexFileMethodInliner { kNameCacheReferenceGetReferent, kNameCacheCharAt, kNameCacheCompareTo, + kNameCacheEquals, kNameCacheGetCharsNoCheck, kNameCacheIsEmpty, kNameCacheIndexOf, @@ -206,6 +207,7 @@ class DexFileMethodInliner { kNameCachePutObjectVolatile, kNameCachePutOrderedObject, kNameCacheArrayCopy, + kNameCacheNumberOfLeadingZeros, kNameCacheLast }; @@ -242,6 +244,7 @@ class DexFileMethodInliner { kProtoCacheJJ_J, kProtoCacheJJ_V, kProtoCacheJS_V, + kProtoCacheObject_Z, kProtoCacheObjectJII_Z, kProtoCacheObjectJJJ_Z, kProtoCacheObjectJObjectObject_Z, @@ -351,11 +354,11 @@ class DexFileMethodInliner { * * Only DexFileToMethodInlinerMap may call this function to initialize the inliner. */ - void FindIntrinsics(const DexFile* dex_file) EXCLUSIVE_LOCKS_REQUIRED(lock_); + void FindIntrinsics(const DexFile* dex_file) REQUIRES(lock_); friend class DexFileToMethodInlinerMap; - bool AddInlineMethod(int32_t method_idx, const InlineMethod& method) LOCKS_EXCLUDED(lock_); + bool AddInlineMethod(int32_t method_idx, const InlineMethod& method) REQUIRES(!lock_); static bool GenInlineConst(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, MIR* move_result, const InlineMethod& method); diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h index 94ffd7f957..3e9fb96bfa 100644 --- a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h @@ -40,12 +40,11 @@ class LazyDebugFrameOpCodeWriter FINAL const ArenaVector<uint8_t>* Patch(size_t code_size); - explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, - ArenaAllocator* allocator) - : Base(enable_writes, allocator->Adapter()), - last_lir_insn_(last_lir_insn), - advances_(allocator->Adapter()), - patched_(false) { + LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, ArenaAllocator* allocator) + : Base(enable_writes, allocator->Adapter()), + last_lir_insn_(last_lir_insn), + advances_(allocator->Adapter()), + patched_(false) { } private: diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index b098bc2b5d..ec4bad778c 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -49,9 +49,11 @@ static constexpr RegStorage reserved_regs_arr_32[] = static constexpr RegStorage core_temps_arr_32[] = {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0_32, rs_rT1_32, rs_rT2_32, rs_rT3_32, rs_rT4_32, rs_rT5_32, rs_rT6_32, rs_rT7_32, rs_rT8}; -static constexpr RegStorage sp_temps_arr_32[] = +static constexpr RegStorage sp_fr0_temps_arr_32[] = {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10, rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15}; +static constexpr RegStorage sp_fr1_temps_arr_32[] = + {rs_rF0, rs_rF2, rs_rF4, rs_rF6, rs_rF8, rs_rF10, rs_rF12, rs_rF14}; static constexpr RegStorage dp_fr0_temps_arr_32[] = {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0, rs_rD7_fr0}; @@ -130,7 +132,8 @@ static constexpr ArrayRef<const RegStorage> dp_fr0_regs_32(dp_fr0_regs_arr_32); static constexpr ArrayRef<const RegStorage> dp_fr1_regs_32(dp_fr1_regs_arr_32); static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32); -static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32); +static constexpr ArrayRef<const RegStorage> sp_fr0_temps_32(sp_fr0_temps_arr_32); +static constexpr ArrayRef<const RegStorage> sp_fr1_temps_32(sp_fr1_temps_arr_32); static constexpr ArrayRef<const RegStorage> dp_fr0_temps_32(dp_fr0_temps_arr_32); static constexpr ArrayRef<const RegStorage> dp_fr1_temps_32(dp_fr1_temps_arr_32); @@ -591,22 +594,22 @@ void MipsMir2Lir::ClobberCallerSave() { Clobber(rs_rFP); Clobber(rs_rRA); Clobber(rs_rF0); - Clobber(rs_rF1); Clobber(rs_rF2); - Clobber(rs_rF3); Clobber(rs_rF4); - Clobber(rs_rF5); Clobber(rs_rF6); - Clobber(rs_rF7); Clobber(rs_rF8); - Clobber(rs_rF9); Clobber(rs_rF10); - Clobber(rs_rF11); Clobber(rs_rF12); - Clobber(rs_rF13); Clobber(rs_rF14); - Clobber(rs_rF15); if (fpuIs32Bit_) { + Clobber(rs_rF1); + Clobber(rs_rF3); + Clobber(rs_rF5); + Clobber(rs_rF7); + Clobber(rs_rF9); + Clobber(rs_rF11); + Clobber(rs_rF13); + Clobber(rs_rF15); Clobber(rs_rD0_fr0); Clobber(rs_rD1_fr0); Clobber(rs_rD2_fr0); @@ -717,24 +720,26 @@ void MipsMir2Lir::CompilerInitializeRegAlloc() { fpuIs32Bit_ ? dp_fr0_regs_32 : dp_fr1_regs_32, reserved_regs_32, empty_pool, // reserved64 core_temps_32, empty_pool, // core64_temps - sp_temps_32, + fpuIs32Bit_ ? sp_fr0_temps_32 : sp_fr1_temps_32, fpuIs32Bit_ ? dp_fr0_temps_32 : dp_fr1_temps_32)); // Alias single precision floats to appropriate half of overlapping double. for (RegisterInfo* info : reg_pool_->sp_regs_) { int sp_reg_num = info->GetReg().GetRegNum(); int dp_reg_num = sp_reg_num & ~1; - RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num); - RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); - // Double precision register's master storage should refer to itself. - DCHECK_EQ(dp_reg_info, dp_reg_info->Master()); - // Redirect single precision's master storage to master. - info->SetMaster(dp_reg_info); - // Singles should show a single 32-bit mask bit, at first referring to the low half. - DCHECK_EQ(info->StorageMask(), 0x1U); - if (sp_reg_num & 1) { - // For odd singles, change to user the high word of the backing double. - info->SetStorageMask(0x2); + if (fpuIs32Bit_ || (sp_reg_num == dp_reg_num)) { + RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num); + RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); + // Double precision register's master storage should refer to itself. + DCHECK_EQ(dp_reg_info, dp_reg_info->Master()); + // Redirect single precision's master storage to master. + info->SetMaster(dp_reg_info); + // Singles should show a single 32-bit mask bit, at first referring to the low half. + DCHECK_EQ(info->StorageMask(), 0x1U); + if (sp_reg_num & 1) { + // For odd singles, change to user the high word of the backing double. + info->SetStorageMask(0x2); + } } } } diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 37e5804f18..ec2475a7f7 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -714,7 +714,7 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora } else { opcode = kMipsFldc1; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; } is64bit = true; @@ -736,15 +736,15 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora DCHECK(r_dest.IsDouble()); } } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: opcode = kMipsLhu; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kSignedHalf: opcode = kMipsLh; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: opcode = kMipsLbu; @@ -891,7 +891,7 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStor } else { opcode = kMipsFsdc1; } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; } is64bit = true; @@ -913,12 +913,12 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStor DCHECK(r_src.IsDouble()); } } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: case kSignedHalf: opcode = kMipsSh; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: case kSignedByte: diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index dd68dd40c6..16c161e320 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -36,7 +36,7 @@ namespace art { // Run the tests only on host. -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ class QuickCFITest : public CFITest { public: @@ -56,6 +56,8 @@ class QuickCFITest : public CFITest { CompilerOptions::kDefaultSmallMethodThreshold, CompilerOptions::kDefaultTinyMethodThreshold, CompilerOptions::kDefaultNumDexMethodsThreshold, + CompilerOptions::kDefaultInlineDepthLimit, + CompilerOptions::kDefaultInlineMaxCodeUnits, false, CompilerOptions::kDefaultTopKProfileThreshold, false, @@ -134,6 +136,6 @@ TEST_ISA(kX86_64) TEST_ISA(kMips) TEST_ISA(kMips64) -#endif // HAVE_ANDROID_OS +#endif // __ANDROID__ } // namespace art diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index 43dd5786af..4a39ab3565 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -50,7 +50,7 @@ class QuickCompiler : public Compiler { const DexFile& dex_file) const OVERRIDE; uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit); diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index 798e23fbac..98e9f38d52 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -39,6 +39,8 @@ class QuickAssembleX86TestBase : public testing::Test { CompilerOptions::kDefaultSmallMethodThreshold, CompilerOptions::kDefaultTinyMethodThreshold, CompilerOptions::kDefaultNumDexMethodsThreshold, + CompilerOptions::kDefaultInlineDepthLimit, + CompilerOptions::kDefaultInlineMaxCodeUnits, false, CompilerOptions::kDefaultTopKProfileThreshold, false, diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 61a1becac1..b16ae982f2 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -659,7 +659,7 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov32RA : kX86Mov32RM; } // TODO: double store is to unaligned address - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kWord: if (cu_->target64) { @@ -677,15 +677,15 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86MovssRA : kX86MovssRM; DCHECK(r_dest.IsFloat()); } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kUnsignedHalf: opcode = is_array ? kX86Movzx16RA : kX86Movzx16RM; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kSignedHalf: opcode = is_array ? kX86Movsx16RA : kX86Movsx16RM; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: opcode = is_array ? kX86Movzx8RA : kX86Movzx8RM; @@ -812,7 +812,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov32AR : kX86Mov32MR; } // TODO: double store is to unaligned address - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); break; case kWord: if (cu_->target64) { @@ -831,13 +831,13 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86MovssAR : kX86MovssMR; DCHECK(r_src.IsSingle()); } - DCHECK_EQ((displacement & 0x3), 0); + DCHECK_ALIGNED(displacement, 4); consider_non_temporal = true; break; case kUnsignedHalf: case kSignedHalf: opcode = is_array ? kX86Mov16AR : kX86Mov16MR; - DCHECK_EQ((displacement & 0x1), 0); + DCHECK_ALIGNED(displacement, 2); break; case kUnsignedByte: case kSignedByte: diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h index d692d26229..03bf57bded 100644 --- a/compiler/dex/quick_compiler_callbacks.h +++ b/compiler/dex/quick_compiler_callbacks.h @@ -38,7 +38,7 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks { ~QuickCompilerCallbacks() { } bool MethodVerified(verifier::MethodVerifier* verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE; + SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE; void ClassRejected(ClassReference ref) OVERRIDE; diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h index 7fc2a2363d..9934f6b13b 100644 --- a/compiler/dex/verification_results.h +++ b/compiler/dex/verification_results.h @@ -43,15 +43,15 @@ class VerificationResults { ~VerificationResults(); bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - LOCKS_EXCLUDED(verified_methods_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!verified_methods_lock_); const VerifiedMethod* GetVerifiedMethod(MethodReference ref) - LOCKS_EXCLUDED(verified_methods_lock_); - void RemoveVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_); + REQUIRES(!verified_methods_lock_); + void RemoveVerifiedMethod(MethodReference ref) REQUIRES(!verified_methods_lock_); - void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_); - bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_); + void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_); + bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_); bool IsCandidateForCompilation(MethodReference& method_ref, const uint32_t access_flags); diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index bf11839cf0..f7d6d67368 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -44,7 +44,7 @@ class VerifiedMethod { typedef SafeMap<uint32_t, DexFileReference> DequickenMap; static const VerifiedMethod* Create(verifier::MethodVerifier* method_verifier, bool compile) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); ~VerifiedMethod() = default; const std::vector<uint8_t>& GetDexGcMap() const { @@ -107,15 +107,15 @@ class VerifiedMethod { // Generate devirtualizaion map into devirt_map_. void GenerateDevirtMap(verifier::MethodVerifier* method_verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Generate dequickening map into dequicken_map_. Returns false if there is an error. bool GenerateDequickenMap(verifier::MethodVerifier* method_verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Generate safe case set into safe_cast_set_. void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); std::vector<uint8_t> dex_gc_map_; DevirtualizationMap devirt_map_; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 7890108f41..fa4667ec97 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -39,6 +39,7 @@ #include "compiler_driver-inl.h" #include "dex_compilation_unit.h" #include "dex_file-inl.h" +#include "dex/dex_to_dex_compiler.h" #include "dex/verification_results.h" #include "dex/verified_method.h" #include "dex/quick/dex_file_method_inliner.h" @@ -167,69 +168,69 @@ class CompilerDriver::AOTCompilationStats { #define STATS_LOCK() #endif - void TypeInDexCache() { + void TypeInDexCache() REQUIRES(!stats_lock_) { STATS_LOCK(); types_in_dex_cache_++; } - void TypeNotInDexCache() { + void TypeNotInDexCache() REQUIRES(!stats_lock_) { STATS_LOCK(); types_not_in_dex_cache_++; } - void StringInDexCache() { + void StringInDexCache() REQUIRES(!stats_lock_) { STATS_LOCK(); strings_in_dex_cache_++; } - void StringNotInDexCache() { + void StringNotInDexCache() REQUIRES(!stats_lock_) { STATS_LOCK(); strings_not_in_dex_cache_++; } - void TypeDoesntNeedAccessCheck() { + void TypeDoesntNeedAccessCheck() REQUIRES(!stats_lock_) { STATS_LOCK(); resolved_types_++; } - void TypeNeedsAccessCheck() { + void TypeNeedsAccessCheck() REQUIRES(!stats_lock_) { STATS_LOCK(); unresolved_types_++; } - void ResolvedInstanceField() { + void ResolvedInstanceField() REQUIRES(!stats_lock_) { STATS_LOCK(); resolved_instance_fields_++; } - void UnresolvedInstanceField() { + void UnresolvedInstanceField() REQUIRES(!stats_lock_) { STATS_LOCK(); unresolved_instance_fields_++; } - void ResolvedLocalStaticField() { + void ResolvedLocalStaticField() REQUIRES(!stats_lock_) { STATS_LOCK(); resolved_local_static_fields_++; } - void ResolvedStaticField() { + void ResolvedStaticField() REQUIRES(!stats_lock_) { STATS_LOCK(); resolved_static_fields_++; } - void UnresolvedStaticField() { + void UnresolvedStaticField() REQUIRES(!stats_lock_) { STATS_LOCK(); unresolved_static_fields_++; } // Indicate that type information from the verifier led to devirtualization. - void PreciseTypeDevirtualization() { + void PreciseTypeDevirtualization() REQUIRES(!stats_lock_) { STATS_LOCK(); type_based_devirtualization_++; } // Indicate that a method of the given type was resolved at compile time. - void ResolvedMethod(InvokeType type) { + void ResolvedMethod(InvokeType type) REQUIRES(!stats_lock_) { DCHECK_LE(type, kMaxInvokeType); STATS_LOCK(); resolved_methods_[type]++; @@ -237,7 +238,7 @@ class CompilerDriver::AOTCompilationStats { // Indicate that a method of the given type was unresolved at compile time as it was in an // unknown dex file. - void UnresolvedMethod(InvokeType type) { + void UnresolvedMethod(InvokeType type) REQUIRES(!stats_lock_) { DCHECK_LE(type, kMaxInvokeType); STATS_LOCK(); unresolved_methods_[type]++; @@ -245,27 +246,27 @@ class CompilerDriver::AOTCompilationStats { // Indicate that a type of virtual method dispatch has been converted into a direct method // dispatch. - void VirtualMadeDirect(InvokeType type) { + void VirtualMadeDirect(InvokeType type) REQUIRES(!stats_lock_) { DCHECK(type == kVirtual || type == kInterface || type == kSuper); STATS_LOCK(); virtual_made_direct_[type]++; } // Indicate that a method of the given type was able to call directly into boot. - void DirectCallsToBoot(InvokeType type) { + void DirectCallsToBoot(InvokeType type) REQUIRES(!stats_lock_) { DCHECK_LE(type, kMaxInvokeType); STATS_LOCK(); direct_calls_to_boot_[type]++; } // Indicate that a method of the given type was able to be resolved directly from boot. - void DirectMethodsToBoot(InvokeType type) { + void DirectMethodsToBoot(InvokeType type) REQUIRES(!stats_lock_) { DCHECK_LE(type, kMaxInvokeType); STATS_LOCK(); direct_methods_to_boot_[type]++; } - void ProcessedInvoke(InvokeType type, int flags) { + void ProcessedInvoke(InvokeType type, int flags) REQUIRES(!stats_lock_) { STATS_LOCK(); if (flags == 0) { unresolved_methods_[type]++; @@ -290,13 +291,13 @@ class CompilerDriver::AOTCompilationStats { } // A check-cast could be eliminated due to verifier type analysis. - void SafeCast() { + void SafeCast() REQUIRES(!stats_lock_) { STATS_LOCK(); safe_casts_++; } // A check-cast couldn't be eliminated due to verifier type analysis. - void NotASafeCast() { + void NotASafeCast() REQUIRES(!stats_lock_) { STATS_LOCK(); not_safe_casts_++; } @@ -334,16 +335,6 @@ class CompilerDriver::AOTCompilationStats { DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats); }; - -extern "C" art::CompiledMethod* ArtCompileDEX(art::CompilerDriver& compiler, - const art::DexFile::CodeItem* code_item, - uint32_t access_flags, - art::InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const art::DexFile& dex_file); - CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, VerificationResults* verification_results, DexFileToMethodInlinerMap* method_inliner_map, @@ -394,8 +385,6 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, DCHECK(verification_results_ != nullptr); DCHECK(method_inliner_map_ != nullptr); - dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX); - compiler_->Init(); CHECK_EQ(image_, image_classes_.get() != nullptr); @@ -508,13 +497,14 @@ void CompilerDriver::CompileAll(jobject class_loader, } } -DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel( - Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - const DexFile::ClassDef& class_def) { +static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( + Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader, + const DexFile& dex_file, const DexFile::ClassDef& class_def) + SHARED_REQUIRES(Locks::mutator_lock_) { auto* const runtime = Runtime::Current(); - if (runtime->UseJit() || GetCompilerOptions().VerifyAtRuntime()) { + if (runtime->UseJit() || driver.GetCompilerOptions().VerifyAtRuntime()) { // Verify at runtime shouldn't dex to dex since we didn't resolve of verify. - return kDontDexToDexCompile; + return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } const char* descriptor = dex_file.GetClassDescriptor(class_def); ClassLinker* class_linker = runtime->GetClassLinker(); @@ -522,7 +512,7 @@ DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel( if (klass == nullptr) { CHECK(self->IsExceptionPending()); self->ClearException(); - return kDontDexToDexCompile; + return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic // references with actual offsets. We cannot re-verify such instructions. @@ -532,14 +522,142 @@ DexToDexCompilationLevel CompilerDriver::GetDexToDexCompilationlevel( // optimize when a class has been fully verified before. if (klass->IsVerified()) { // Class is verified so we can enable DEX-to-DEX compilation for performance. - return kOptimize; + return optimizer::DexToDexCompilationLevel::kOptimize; } else if (klass->IsCompileTimeVerified()) { // Class verification has soft-failed. Anyway, ensure at least correctness. DCHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime); - return kRequired; + return optimizer::DexToDexCompilationLevel::kRequired; } else { // Class verification has failed: do not run DEX-to-DEX compilation. - return kDontDexToDexCompile; + return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; + } +} + +static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( + Thread* self, + const CompilerDriver& driver, + jobject jclass_loader, + const DexFile& dex_file, + const DexFile::ClassDef& class_def) { + ScopedObjectAccess soa(self); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + return GetDexToDexCompilationLevel(self, driver, class_loader, dex_file, class_def); +} + +// Does the runtime for the InstructionSet provide an implementation returned by +// GetQuickGenericJniStub allowing down calls that aren't compiled using a JNI compiler? +static bool InstructionSetHasGenericJniStub(InstructionSet isa) { + switch (isa) { + case kArm: + case kArm64: + case kThumb2: + case kMips: + case kMips64: + case kX86: + case kX86_64: return true; + default: return false; + } +} + +static void CompileMethod(Thread* self, + CompilerDriver* driver, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level, + bool compilation_enabled) + REQUIRES(!driver->compiled_methods_lock_) { + DCHECK(driver != nullptr); + CompiledMethod* compiled_method = nullptr; + uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; + MethodReference method_ref(&dex_file, method_idx); + + if ((access_flags & kAccNative) != 0) { + // Are we interpreting only and have support for generic JNI down calls? + if (!driver->GetCompilerOptions().IsCompilationEnabled() && + InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { + // Leaving this empty will trigger the generic JNI version + } else { + compiled_method = driver->GetCompiler()->JniCompile(access_flags, method_idx, dex_file); + CHECK(compiled_method != nullptr); + } + } else if ((access_flags & kAccAbstract) != 0) { + // Abstract methods don't have code. + } else { + bool has_verified_method = driver->GetVerificationResults() + ->GetVerifiedMethod(method_ref) != nullptr; + bool compile = compilation_enabled && + // Basic checks, e.g., not <clinit>. + driver->GetVerificationResults() + ->IsCandidateForCompilation(method_ref, access_flags) && + // Did not fail to create VerifiedMethod metadata. + has_verified_method && + // Is eligable for compilation by methods-to-compile filter. + driver->IsMethodToCompile(method_ref); + if (compile) { + // NOTE: if compiler declines to compile this method, it will return null. + compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type, + class_def_idx, method_idx, class_loader, + dex_file); + } + if (compiled_method == nullptr && + dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) { + // TODO: add a command-line option to disable DEX-to-DEX compilation ? + // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on + // it. + compiled_method = optimizer::ArtCompileDEX( + driver, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + has_verified_method + ? dex_to_dex_compilation_level + : optimizer::DexToDexCompilationLevel::kRequired); + } + } + if (kTimeCompileMethod) { + uint64_t duration_ns = NanoTime() - start_ns; + if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) { + LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file) + << " took " << PrettyDuration(duration_ns); + } + } + + if (compiled_method != nullptr) { + // Count non-relative linker patches. + size_t non_relative_linker_patch_count = 0u; + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (!patch.IsPcRelative()) { + ++non_relative_linker_patch_count; + } + } + bool compile_pic = driver->GetCompilerOptions().GetCompilePic(); // Off by default + // When compiling with PIC, there should be zero non-relative linker patches + CHECK(!compile_pic || non_relative_linker_patch_count == 0u); + + driver->AddCompiledMethod(method_ref, compiled_method, non_relative_linker_patch_count); + } + + // Done compiling, delete the verified method to reduce native memory usage. Do not delete in + // optimizing compiler, which may need the verified method again for inlining. + if (driver->GetCompilerKind() != Compiler::kOptimizing) { + driver->GetVerificationResults()->RemoveVerifiedMethod(method_ref); + } + + if (self->IsExceptionPending()) { + ScopedObjectAccess soa(self); + LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n" + << self->GetException()->Dump(); } } @@ -570,24 +688,30 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t PreCompile(jclass_loader, dex_files, thread_pool.get(), timings); // Can we run DEX-to-DEX compiler on this class ? - DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile; - { - ScopedObjectAccess soa(self); - const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - dex_to_dex_compilation_level = GetDexToDexCompilationlevel(self, class_loader, *dex_file, - class_def); - } - CompileMethod(self, code_item, access_flags, invoke_type, class_def_idx, method_idx, - jclass_loader, *dex_file, dex_to_dex_compilation_level, true); + optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + GetDexToDexCompilationLevel(self, + *this, + jclass_loader, + *dex_file, + dex_file->GetClassDef(class_def_idx)); + + CompileMethod(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + *dex_file, + dex_to_dex_compilation_level, + true); self->GetJniEnv()->DeleteGlobalRef(jclass_loader); self->TransitionFromSuspendedToRunnable(); } -CompiledMethod* CompilerDriver::CompileMethod(Thread* self, ArtMethod* method) { +CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) { const uint32_t method_idx = method->GetDexMethodIndex(); const uint32_t access_flags = method->GetAccessFlags(); const InvokeType invoke_type = method->GetInvokeType(); @@ -598,12 +722,21 @@ CompiledMethod* CompilerDriver::CompileMethod(Thread* self, ArtMethod* method) { const DexFile* dex_file = method->GetDexFile(); const uint16_t class_def_idx = method->GetClassDefIndex(); const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx); - DexToDexCompilationLevel dex_to_dex_compilation_level = - GetDexToDexCompilationlevel(self, class_loader, *dex_file, class_def); + optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + GetDexToDexCompilationLevel(self, *this, class_loader, *dex_file, class_def); const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); self->TransitionFromRunnableToSuspended(kNative); - CompileMethod(self, code_item, access_flags, invoke_type, class_def_idx, method_idx, - jclass_loader, *dex_file, dex_to_dex_compilation_level, true); + CompileMethod(self, + this, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + *dex_file, + dex_to_dex_compilation_level, + true); auto* compiled_method = GetCompiledMethod(MethodReference(dex_file, method_idx)); self->TransitionFromSuspendedToRunnable(); return compiled_method; @@ -690,70 +823,79 @@ bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end(); } -static void ResolveExceptionsForMethod( - ArtMethod* method_handle, std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - const DexFile::CodeItem* code_item = method_handle->GetCodeItem(); - if (code_item == nullptr) { - return; // native or abstract method - } - if (code_item->tries_size_ == 0) { - return; // nothing to process - } - const uint8_t* encoded_catch_handler_list = DexFile::GetCatchHandlerData(*code_item, 0); - size_t num_encoded_catch_handlers = DecodeUnsignedLeb128(&encoded_catch_handler_list); - for (size_t i = 0; i < num_encoded_catch_handlers; i++) { - int32_t encoded_catch_handler_size = DecodeSignedLeb128(&encoded_catch_handler_list); - bool has_catch_all = false; - if (encoded_catch_handler_size <= 0) { - encoded_catch_handler_size = -encoded_catch_handler_size; - has_catch_all = true; - } - for (int32_t j = 0; j < encoded_catch_handler_size; j++) { - uint16_t encoded_catch_handler_handlers_type_idx = - DecodeUnsignedLeb128(&encoded_catch_handler_list); - // Add to set of types to resolve if not already in the dex cache resolved types - if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { - exceptions_to_resolve.insert( - std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx, - method_handle->GetDexFile())); - } - // ignore address associated with catch handler - DecodeUnsignedLeb128(&encoded_catch_handler_list); +class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { + public: + ResolveCatchBlockExceptionsClassVisitor( + std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve) + : exceptions_to_resolve_(exceptions_to_resolve) {} + + virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + for (auto& m : c->GetVirtualMethods(pointer_size)) { + ResolveExceptionsForMethod(&m); } - if (has_catch_all) { - // ignore catch all address - DecodeUnsignedLeb128(&encoded_catch_handler_list); + for (auto& m : c->GetDirectMethods(pointer_size)) { + ResolveExceptionsForMethod(&m); } + return true; } -} -static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto* exceptions_to_resolve = - reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg); - const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - for (auto& m : c->GetVirtualMethods(pointer_size)) { - ResolveExceptionsForMethod(&m, *exceptions_to_resolve); + private: + void ResolveExceptionsForMethod(ArtMethod* method_handle) SHARED_REQUIRES(Locks::mutator_lock_) { + const DexFile::CodeItem* code_item = method_handle->GetCodeItem(); + if (code_item == nullptr) { + return; // native or abstract method + } + if (code_item->tries_size_ == 0) { + return; // nothing to process + } + const uint8_t* encoded_catch_handler_list = DexFile::GetCatchHandlerData(*code_item, 0); + size_t num_encoded_catch_handlers = DecodeUnsignedLeb128(&encoded_catch_handler_list); + for (size_t i = 0; i < num_encoded_catch_handlers; i++) { + int32_t encoded_catch_handler_size = DecodeSignedLeb128(&encoded_catch_handler_list); + bool has_catch_all = false; + if (encoded_catch_handler_size <= 0) { + encoded_catch_handler_size = -encoded_catch_handler_size; + has_catch_all = true; + } + for (int32_t j = 0; j < encoded_catch_handler_size; j++) { + uint16_t encoded_catch_handler_handlers_type_idx = + DecodeUnsignedLeb128(&encoded_catch_handler_list); + // Add to set of types to resolve if not already in the dex cache resolved types + if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { + exceptions_to_resolve_.emplace(encoded_catch_handler_handlers_type_idx, + method_handle->GetDexFile()); + } + // ignore address associated with catch handler + DecodeUnsignedLeb128(&encoded_catch_handler_list); + } + if (has_catch_all) { + // ignore catch all address + DecodeUnsignedLeb128(&encoded_catch_handler_list); + } + } } - for (auto& m : c->GetDirectMethods(pointer_size)) { - ResolveExceptionsForMethod(&m, *exceptions_to_resolve); + + std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve_; +}; + +class RecordImageClassesVisitor : public ClassVisitor { + public: + explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes) + : image_classes_(image_classes) {} + + bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + std::string temp; + image_classes_->insert(klass->GetDescriptor(&temp)); + return true; } - return true; -} -static bool RecordImageClassesVisitor(mirror::Class* klass, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - std::unordered_set<std::string>* image_classes = - reinterpret_cast<std::unordered_set<std::string>*>(arg); - std::string temp; - image_classes->insert(klass->GetDescriptor(&temp)); - return true; -} + private: + std::unordered_set<std::string>* const image_classes_; +}; // Make a list of descriptors for classes to include in the image -void CompilerDriver::LoadImageClasses(TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_) { +void CompilerDriver::LoadImageClasses(TimingLogger* timings) { CHECK(timings != nullptr); if (!IsImage()) { return; @@ -788,8 +930,8 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;"))); do { unresolved_exception_types.clear(); - class_linker->VisitClasses(ResolveCatchBlockExceptionsClassVisitor, - &unresolved_exception_types); + ResolveCatchBlockExceptionsClassVisitor visitor(unresolved_exception_types); + class_linker->VisitClasses(&visitor); for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) { uint16_t exception_type_idx = exception_type.first; const DexFile* dex_file = exception_type.second; @@ -812,14 +954,15 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) // We walk the roots looking for classes so that we'll pick up the // above classes plus any classes them depend on such super // classes, interfaces, and the required ClassLinker roots. - class_linker->VisitClasses(RecordImageClassesVisitor, image_classes_.get()); + RecordImageClassesVisitor visitor(image_classes_.get()); + class_linker->VisitClasses(&visitor); CHECK_NE(image_classes_->size(), 0U); } static void MaybeAddToImageClasses(Handle<mirror::Class> c, std::unordered_set<std::string>* image_classes) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { Thread* self = Thread::Current(); StackHandleScope<1> hs(self); // Make a copy of the handle so that we don't clobber it doing Assign. @@ -876,7 +1019,7 @@ class ClinitImageUpdate { // Visitor for VisitReferences. void operator()(mirror::Object* object, MemberOffset field_offset, bool /* is_static */) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset); if (ref != nullptr) { VisitClinitClassesObject(ref); @@ -884,10 +1027,15 @@ class ClinitImageUpdate { } // java.lang.Reference visitor for VisitReferences. - void operator()(mirror::Class* /* klass */, mirror::Reference* /* ref */) const { - } + void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref ATTRIBUTE_UNUSED) + const {} + + // Ignore class native roots. + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) + const {} + void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {} - void Walk() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + void Walk() SHARED_REQUIRES(Locks::mutator_lock_) { // Use the initial classes as roots for a search. for (mirror::Class* klass_root : image_classes_) { VisitClinitClassesObject(klass_root); @@ -895,9 +1043,32 @@ class ClinitImageUpdate { } private: + class FindImageClassesVisitor : public ClassVisitor { + public: + explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {} + + bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + std::string temp; + const char* name = klass->GetDescriptor(&temp); + if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) { + data_->image_classes_.push_back(klass); + } else { + // Check whether it is initialized and has a clinit. They must be kept, too. + if (klass->IsInitialized() && klass->FindClassInitializer( + Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) { + data_->image_classes_.push_back(klass); + } + } + return true; + } + + private: + ClinitImageUpdate* const data_; + }; + ClinitImageUpdate(std::unordered_set<std::string>* image_class_descriptors, Thread* self, ClassLinker* linker) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : + SHARED_REQUIRES(Locks::mutator_lock_) : image_class_descriptors_(image_class_descriptors), self_(self) { CHECK(linker != nullptr); CHECK(image_class_descriptors != nullptr); @@ -911,29 +1082,12 @@ class ClinitImageUpdate { // Find all the already-marked classes. WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - linker->VisitClasses(FindImageClasses, this); - } - - static bool FindImageClasses(mirror::Class* klass, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ClinitImageUpdate* data = reinterpret_cast<ClinitImageUpdate*>(arg); - std::string temp; - const char* name = klass->GetDescriptor(&temp); - if (data->image_class_descriptors_->find(name) != data->image_class_descriptors_->end()) { - data->image_classes_.push_back(klass); - } else { - // Check whether it is initialized and has a clinit. They must be kept, too. - if (klass->IsInitialized() && klass->FindClassInitializer( - Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) { - data->image_classes_.push_back(klass); - } - } - - return true; + FindImageClassesVisitor visitor(this); + linker->VisitClasses(&visitor); } void VisitClinitClassesObject(mirror::Object* object) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(object != nullptr); if (marked_objects_.find(object) != marked_objects_.end()) { // Already processed. @@ -1569,10 +1723,14 @@ bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc return result; } -class ParallelCompilationManager { +class CompilationVisitor { public: - typedef void Callback(const ParallelCompilationManager* manager, size_t index); + virtual ~CompilationVisitor() {} + virtual void Visit(size_t index) = 0; +}; +class ParallelCompilationManager { + public: ParallelCompilationManager(ClassLinker* class_linker, jobject class_loader, CompilerDriver* compiler, @@ -1610,14 +1768,15 @@ class ParallelCompilationManager { return dex_files_; } - void ForAll(size_t begin, size_t end, Callback callback, size_t work_units) { + void ForAll(size_t begin, size_t end, CompilationVisitor* visitor, size_t work_units) + REQUIRES(!*Locks::mutator_lock_) { Thread* self = Thread::Current(); self->AssertNoPendingException(); CHECK_GT(work_units, 0U); index_.StoreRelaxed(begin); for (size_t i = 0; i < work_units; ++i) { - thread_pool_->AddTask(self, new ForAllClosure(this, end, callback)); + thread_pool_->AddTask(self, new ForAllClosure(this, end, visitor)); } thread_pool_->StartWorkers(self); @@ -1636,10 +1795,10 @@ class ParallelCompilationManager { private: class ForAllClosure : public Task { public: - ForAllClosure(ParallelCompilationManager* manager, size_t end, Callback* callback) + ForAllClosure(ParallelCompilationManager* manager, size_t end, CompilationVisitor* visitor) : manager_(manager), end_(end), - callback_(callback) {} + visitor_(visitor) {} virtual void Run(Thread* self) { while (true) { @@ -1647,7 +1806,7 @@ class ParallelCompilationManager { if (UNLIKELY(index >= end_)) { break; } - callback_(manager_, index); + visitor_->Visit(index); self->AssertNoPendingException(); } } @@ -1659,7 +1818,7 @@ class ParallelCompilationManager { private: ParallelCompilationManager* const manager_; const size_t end_; - Callback* const callback_; + CompilationVisitor* const visitor_; }; AtomicInteger index_; @@ -1676,7 +1835,7 @@ class ParallelCompilationManager { // A fast version of SkipClass above if the class pointer is available // that avoids the expensive FindInClassPath search. static bool SkipClass(jobject class_loader, const DexFile& dex_file, mirror::Class* klass) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(klass != nullptr); const DexFile& original_dex_file = *klass->GetDexCache()->GetDexFile(); if (&dex_file != &original_dex_file) { @@ -1691,7 +1850,7 @@ static bool SkipClass(jobject class_loader, const DexFile& dex_file, mirror::Cla } static void CheckAndClearResolveException(Thread* self) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { CHECK(self->IsExceptionPending()); mirror::Throwable* exception = self->GetException(); std::string temp; @@ -1717,134 +1876,148 @@ static void CheckAndClearResolveException(Thread* self) self->ClearException(); } -static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manager, - size_t class_def_index) - LOCKS_EXCLUDED(Locks::mutator_lock_) { - ATRACE_CALL(); - Thread* self = Thread::Current(); - jobject jclass_loader = manager->GetClassLoader(); - const DexFile& dex_file = *manager->GetDexFile(); - ClassLinker* class_linker = manager->GetClassLinker(); - - // If an instance field is final then we need to have a barrier on the return, static final - // fields are assigned within the lock held for class initialization. Conservatively assume - // constructor barriers are always required. - bool requires_constructor_barrier = true; - - // Method and Field are the worst. We can't resolve without either - // context from the code use (to disambiguate virtual vs direct - // method and instance vs static field) or from class - // definitions. While the compiler will resolve what it can as it - // needs it, here we try to resolve fields and methods used in class - // definitions, since many of them many never be referenced by - // generated code. - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ScopedObjectAccess soa(self); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); - // Resolve the class. - mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache, - class_loader); - bool resolve_fields_and_methods; - if (klass == nullptr) { - // Class couldn't be resolved, for example, super-class is in a different dex file. Don't - // attempt to resolve methods and fields when there is no declaring class. - CheckAndClearResolveException(soa.Self()); - resolve_fields_and_methods = false; - } else { - // We successfully resolved a class, should we skip it? - if (SkipClass(jclass_loader, dex_file, klass)) { - return; - } - // We want to resolve the methods and fields eagerly. - resolve_fields_and_methods = true; - } - // Note the class_data pointer advances through the headers, - // static fields, instance fields, direct methods, and virtual - // methods. - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - // Empty class such as a marker interface. - requires_constructor_barrier = false; - } else { - ClassDataItemIterator it(dex_file, class_data); - while (it.HasNextStaticField()) { - if (resolve_fields_and_methods) { - ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), - dex_cache, class_loader, true); - if (field == nullptr) { - CheckAndClearResolveException(soa.Self()); - } - } - it.Next(); - } - // We require a constructor barrier if there are final instance fields. - requires_constructor_barrier = false; - while (it.HasNextInstanceField()) { - if (it.MemberIsFinal()) { - requires_constructor_barrier = true; +class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { + public: + explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager) + : manager_(manager) {} + + void Visit(size_t class_def_index) OVERRIDE REQUIRES(!Locks::mutator_lock_) { + ATRACE_CALL(); + Thread* const self = Thread::Current(); + jobject jclass_loader = manager_->GetClassLoader(); + const DexFile& dex_file = *manager_->GetDexFile(); + ClassLinker* class_linker = manager_->GetClassLinker(); + + // If an instance field is final then we need to have a barrier on the return, static final + // fields are assigned within the lock held for class initialization. Conservatively assume + // constructor barriers are always required. + bool requires_constructor_barrier = true; + + // Method and Field are the worst. We can't resolve without either + // context from the code use (to disambiguate virtual vs direct + // method and instance vs static field) or from class + // definitions. While the compiler will resolve what it can as it + // needs it, here we try to resolve fields and methods used in class + // definitions, since many of them many never be referenced by + // generated code. + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + ScopedObjectAccess soa(self); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); + // Resolve the class. + mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache, + class_loader); + bool resolve_fields_and_methods; + if (klass == nullptr) { + // Class couldn't be resolved, for example, super-class is in a different dex file. Don't + // attempt to resolve methods and fields when there is no declaring class. + CheckAndClearResolveException(soa.Self()); + resolve_fields_and_methods = false; + } else { + // We successfully resolved a class, should we skip it? + if (SkipClass(jclass_loader, dex_file, klass)) { + return; } - if (resolve_fields_and_methods) { - ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), - dex_cache, class_loader, false); - if (field == nullptr) { - CheckAndClearResolveException(soa.Self()); + // We want to resolve the methods and fields eagerly. + resolve_fields_and_methods = true; + } + // Note the class_data pointer advances through the headers, + // static fields, instance fields, direct methods, and virtual + // methods. + const uint8_t* class_data = dex_file.GetClassData(class_def); + if (class_data == nullptr) { + // Empty class such as a marker interface. + requires_constructor_barrier = false; + } else { + ClassDataItemIterator it(dex_file, class_data); + while (it.HasNextStaticField()) { + if (resolve_fields_and_methods) { + ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), + dex_cache, class_loader, true); + if (field == nullptr) { + CheckAndClearResolveException(soa.Self()); + } } + it.Next(); } - it.Next(); - } - if (resolve_fields_and_methods) { - while (it.HasNextDirectMethod()) { - ArtMethod* method = class_linker->ResolveMethod( - dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr, - it.GetMethodInvokeType(class_def)); - if (method == nullptr) { - CheckAndClearResolveException(soa.Self()); + // We require a constructor barrier if there are final instance fields. + requires_constructor_barrier = false; + while (it.HasNextInstanceField()) { + if (it.MemberIsFinal()) { + requires_constructor_barrier = true; + } + if (resolve_fields_and_methods) { + ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), + dex_cache, class_loader, false); + if (field == nullptr) { + CheckAndClearResolveException(soa.Self()); + } } it.Next(); } - while (it.HasNextVirtualMethod()) { - ArtMethod* method = class_linker->ResolveMethod( - dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr, - it.GetMethodInvokeType(class_def)); - if (method == nullptr) { - CheckAndClearResolveException(soa.Self()); + if (resolve_fields_and_methods) { + while (it.HasNextDirectMethod()) { + ArtMethod* method = class_linker->ResolveMethod( + dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr, + it.GetMethodInvokeType(class_def)); + if (method == nullptr) { + CheckAndClearResolveException(soa.Self()); + } + it.Next(); } - it.Next(); + while (it.HasNextVirtualMethod()) { + ArtMethod* method = class_linker->ResolveMethod( + dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr, + it.GetMethodInvokeType(class_def)); + if (method == nullptr) { + CheckAndClearResolveException(soa.Self()); + } + it.Next(); + } + DCHECK(!it.HasNext()); } - DCHECK(!it.HasNext()); + } + if (requires_constructor_barrier) { + manager_->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index); } } - if (requires_constructor_barrier) { - manager->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index); - } -} -static void ResolveType(const ParallelCompilationManager* manager, size_t type_idx) - LOCKS_EXCLUDED(Locks::mutator_lock_) { - // Class derived values are more complicated, they require the linker and loader. - ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = manager->GetClassLinker(); - const DexFile& dex_file = *manager->GetDexFile(); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader()))); - mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader); + private: + const ParallelCompilationManager* const manager_; +}; - if (klass == nullptr) { - CHECK(soa.Self()->IsExceptionPending()); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Exception during type resolution: " << exception->Dump(); - if (exception->GetClass()->DescriptorEquals("Ljava/lang/OutOfMemoryError;")) { - // There's little point continuing compilation if the heap is exhausted. - LOG(FATAL) << "Out of memory during type resolution for compilation"; +class ResolveTypeVisitor : public CompilationVisitor { + public: + explicit ResolveTypeVisitor(const ParallelCompilationManager* manager) : manager_(manager) { + } + virtual void Visit(size_t type_idx) OVERRIDE REQUIRES(!Locks::mutator_lock_) { + // Class derived values are more complicated, they require the linker and loader. + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* class_linker = manager_->GetClassLinker(); + const DexFile& dex_file = *manager_->GetDexFile(); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager_->GetClassLoader()))); + mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader); + + if (klass == nullptr) { + soa.Self()->AssertPendingException(); + mirror::Throwable* exception = soa.Self()->GetException(); + VLOG(compiler) << "Exception during type resolution: " << exception->Dump(); + if (exception->GetClass()->DescriptorEquals("Ljava/lang/OutOfMemoryError;")) { + // There's little point continuing compilation if the heap is exhausted. + LOG(FATAL) << "Out of memory during type resolution for compilation"; + } + soa.Self()->ClearException(); } - soa.Self()->ClearException(); } -} + + private: + const ParallelCompilationManager* const manager_; +}; void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -1860,17 +2033,18 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_fil // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. TimingLogger::ScopedTiming t("Resolve Types", timings); - context.ForAll(0, dex_file.NumTypeIds(), ResolveType, thread_count_); + ResolveTypeVisitor visitor(&context); + context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_); } TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings); - context.ForAll(0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_); + ResolveClassFieldsAndMethodsVisitor visitor(&context); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); } void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { - for (size_t i = 0; i != dex_files.size(); ++i) { - const DexFile* dex_file = dex_files[i]; + for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } @@ -1878,67 +2052,73 @@ void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const D void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { - for (size_t i = 0; i != dex_files.size(); ++i) { - const DexFile* dex_file = dex_files[i]; + for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } } -static void VerifyClass(const ParallelCompilationManager* manager, size_t class_def_index) - LOCKS_EXCLUDED(Locks::mutator_lock_) { - ATRACE_CALL(); - ScopedObjectAccess soa(Thread::Current()); - const DexFile& dex_file = *manager->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = manager->GetClassLinker(); - jobject jclass_loader = manager->GetClassLoader(); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - if (klass.Get() == nullptr) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); +class VerifyClassVisitor : public CompilationVisitor { + public: + explicit VerifyClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - /* - * At compile time, we can still structurally verify the class even if FindClass fails. - * This is to ensure the class is structurally sound for compilation. An unsound class - * will be rejected by the verifier and later skipped during compilation in the compiler. - */ - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); - std::string error_msg; - if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader, - &class_def, true, &error_msg) == - verifier::MethodVerifier::kHardFailure) { - LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor) - << " because: " << error_msg; - manager->GetCompiler()->SetHadHardVerifierFailure(); - } - } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) { - CHECK(klass->IsResolved()) << PrettyClass(klass.Get()); - class_linker->VerifyClass(soa.Self(), klass); - - if (klass->IsErroneous()) { - // ClassLinker::VerifyClass throws, which isn't useful in the compiler. + virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + ATRACE_CALL(); + ScopedObjectAccess soa(Thread::Current()); + const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + const char* descriptor = dex_file.GetClassDescriptor(class_def); + ClassLinker* class_linker = manager_->GetClassLinker(); + jobject jclass_loader = manager_->GetClassLoader(); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + Handle<mirror::Class> klass( + hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); + if (klass.Get() == nullptr) { CHECK(soa.Self()->IsExceptionPending()); soa.Self()->ClearException(); - manager->GetCompiler()->SetHadHardVerifierFailure(); - } - CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous()) - << PrettyDescriptor(klass.Get()) << ": state=" << klass->GetStatus(); + /* + * At compile time, we can still structurally verify the class even if FindClass fails. + * This is to ensure the class is structurally sound for compilation. An unsound class + * will be rejected by the verifier and later skipped during compilation in the compiler. + */ + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); + std::string error_msg; + if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader, + &class_def, true, &error_msg) == + verifier::MethodVerifier::kHardFailure) { + LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor) + << " because: " << error_msg; + manager_->GetCompiler()->SetHadHardVerifierFailure(); + } + } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) { + CHECK(klass->IsResolved()) << PrettyClass(klass.Get()); + class_linker->VerifyClass(soa.Self(), klass); + + if (klass->IsErroneous()) { + // ClassLinker::VerifyClass throws, which isn't useful in the compiler. + CHECK(soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); + manager_->GetCompiler()->SetHadHardVerifierFailure(); + } + + CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous()) + << PrettyDescriptor(klass.Get()) << ": state=" << klass->GetStatus(); - // It is *very* problematic if there are verification errors in the boot classpath. For example, - // we rely on things working OK without verification when the decryption dialog is brought up. - // So abort in a debug build if we find this violated. - DCHECK(!manager->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class " << - PrettyClass(klass.Get()) << " failed to fully verify."; + // It is *very* problematic if there are verification errors in the boot classpath. For example, + // we rely on things working OK without verification when the decryption dialog is brought up. + // So abort in a debug build if we find this violated. + DCHECK(!manager_->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class " + << PrettyClass(klass.Get()) << " failed to fully verify."; + } + soa.Self()->AssertNoPendingException(); } - soa.Self()->AssertNoPendingException(); -} + + private: + const ParallelCompilationManager* const manager_; +}; void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -1947,48 +2127,56 @@ void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); - context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_); + VerifyClassVisitor visitor(&context); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); } -static void SetVerifiedClass(const ParallelCompilationManager* manager, size_t class_def_index) - LOCKS_EXCLUDED(Locks::mutator_lock_) { - ATRACE_CALL(); - ScopedObjectAccess soa(Thread::Current()); - const DexFile& dex_file = *manager->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = manager->GetClassLinker(); - jobject jclass_loader = manager->GetClassLoader(); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - // Class might have failed resolution. Then don't set it to verified. - if (klass.Get() != nullptr) { - // Only do this if the class is resolved. If even resolution fails, quickening will go very, - // very wrong. - if (klass->IsResolved()) { - if (klass->GetStatus() < mirror::Class::kStatusVerified) { - ObjectLock<mirror::Class> lock(soa.Self(), klass); - // Set class status to verified. - mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self()); - // Mark methods as pre-verified. If we don't do this, the interpreter will run with - // access checks. - klass->SetPreverifiedFlagOnAllMethods( - GetInstructionSetPointerSize(manager->GetCompiler()->GetInstructionSet())); - klass->SetPreverified(); +class SetVerifiedClassVisitor : public CompilationVisitor { + public: + explicit SetVerifiedClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} + + virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + ATRACE_CALL(); + ScopedObjectAccess soa(Thread::Current()); + const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + const char* descriptor = dex_file.GetClassDescriptor(class_def); + ClassLinker* class_linker = manager_->GetClassLinker(); + jobject jclass_loader = manager_->GetClassLoader(); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + Handle<mirror::Class> klass( + hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); + // Class might have failed resolution. Then don't set it to verified. + if (klass.Get() != nullptr) { + // Only do this if the class is resolved. If even resolution fails, quickening will go very, + // very wrong. + if (klass->IsResolved()) { + if (klass->GetStatus() < mirror::Class::kStatusVerified) { + ObjectLock<mirror::Class> lock(soa.Self(), klass); + // Set class status to verified. + mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, soa.Self()); + // Mark methods as pre-verified. If we don't do this, the interpreter will run with + // access checks. + klass->SetPreverifiedFlagOnAllMethods( + GetInstructionSetPointerSize(manager_->GetCompiler()->GetInstructionSet())); + klass->SetPreverified(); + } + // Record the final class status if necessary. + ClassReference ref(manager_->GetDexFile(), class_def_index); + manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); } - // Record the final class status if necessary. - ClassReference ref(manager->GetDexFile(), class_def_index); - manager->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); + } else { + Thread* self = soa.Self(); + DCHECK(self->IsExceptionPending()); + self->ClearException(); } - } else { - Thread* self = soa.Self(); - DCHECK(self->IsExceptionPending()); - self->ClearException(); } -} + + private: + const ParallelCompilationManager* const manager_; +}; void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -1997,99 +2185,107 @@ void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); - context.ForAll(0, dex_file.NumClassDefs(), SetVerifiedClass, thread_count_); + SetVerifiedClassVisitor visitor(&context); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); } -static void InitializeClass(const ParallelCompilationManager* manager, size_t class_def_index) - LOCKS_EXCLUDED(Locks::mutator_lock_) { - ATRACE_CALL(); - jobject jclass_loader = manager->GetClassLoader(); - const DexFile& dex_file = *manager->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def.class_idx_); - const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_); +class InitializeClassVisitor : public CompilationVisitor { + public: + explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(manager->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader))); - - if (klass.Get() != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) { - // Only try to initialize classes that were successfully verified. - if (klass->IsVerified()) { - // Attempt to initialize the class but bail if we either need to initialize the super-class - // or static fields. - manager->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false); - if (!klass->IsInitialized()) { - // We don't want non-trivial class initialization occurring on multiple threads due to - // deadlock problems. For example, a parent class is initialized (holding its lock) that - // refers to a sub-class in its static/class initializer causing it to try to acquire the - // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock) - // after first initializing its parents, whose locks are acquired. This leads to a - // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock. - // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather - // than use a special Object for the purpose we use the Class of java.lang.Class. - Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass())); - ObjectLock<mirror::Class> lock(soa.Self(), h_klass); - // Attempt to initialize allowing initialization of parent classes but still not static - // fields. - manager->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); + virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + ATRACE_CALL(); + jobject jclass_loader = manager_->GetClassLoader(); + const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def.class_idx_); + const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_); + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + Handle<mirror::Class> klass( + hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader))); + + if (klass.Get() != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) { + // Only try to initialize classes that were successfully verified. + if (klass->IsVerified()) { + // Attempt to initialize the class but bail if we either need to initialize the super-class + // or static fields. + manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false); if (!klass->IsInitialized()) { - // We need to initialize static fields, we only do this for image classes that aren't - // marked with the $NoPreloadHolder (which implies this should not be initialized early). - bool can_init_static_fields = manager->GetCompiler()->IsImage() && - manager->GetCompiler()->IsImageClass(descriptor) && - !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); - if (can_init_static_fields) { - VLOG(compiler) << "Initializing: " << descriptor; - // TODO multithreading support. We should ensure the current compilation thread has - // exclusive access to the runtime and the transaction. To achieve this, we could use - // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity - // checks in Thread::AssertThreadSuspensionIsAllowable. - Runtime* const runtime = Runtime::Current(); - Transaction transaction; - - // Run the class initializer in transaction mode. - runtime->EnterTransactionMode(&transaction); - const mirror::Class::Status old_status = klass->GetStatus(); - bool success = manager->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true, - true); - // TODO we detach transaction from runtime to indicate we quit the transactional - // mode which prevents the GC from visiting objects modified during the transaction. - // Ensure GC is not run so don't access freed objects when aborting transaction. - - ScopedAssertNoThreadSuspension ants(soa.Self(), "Transaction end"); - runtime->ExitTransactionMode(); - - if (!success) { - CHECK(soa.Self()->IsExceptionPending()); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " - << exception->Dump(); - std::ostream* file_log = manager->GetCompiler()-> - GetCompilerOptions().GetInitFailureOutput(); - if (file_log != nullptr) { - *file_log << descriptor << "\n"; - *file_log << exception->Dump() << "\n"; + // We don't want non-trivial class initialization occurring on multiple threads due to + // deadlock problems. For example, a parent class is initialized (holding its lock) that + // refers to a sub-class in its static/class initializer causing it to try to acquire the + // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock) + // after first initializing its parents, whose locks are acquired. This leads to a + // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock. + // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather + // than use a special Object for the purpose we use the Class of java.lang.Class. + Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass())); + ObjectLock<mirror::Class> lock(soa.Self(), h_klass); + // Attempt to initialize allowing initialization of parent classes but still not static + // fields. + manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); + if (!klass->IsInitialized()) { + // We need to initialize static fields, we only do this for image classes that aren't + // marked with the $NoPreloadHolder (which implies this should not be initialized early). + bool can_init_static_fields = manager_->GetCompiler()->IsImage() && + manager_->GetCompiler()->IsImageClass(descriptor) && + !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); + if (can_init_static_fields) { + VLOG(compiler) << "Initializing: " << descriptor; + // TODO multithreading support. We should ensure the current compilation thread has + // exclusive access to the runtime and the transaction. To achieve this, we could use + // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity + // checks in Thread::AssertThreadSuspensionIsAllowable. + Runtime* const runtime = Runtime::Current(); + Transaction transaction; + + // Run the class initializer in transaction mode. + runtime->EnterTransactionMode(&transaction); + const mirror::Class::Status old_status = klass->GetStatus(); + bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true, + true); + // TODO we detach transaction from runtime to indicate we quit the transactional + // mode which prevents the GC from visiting objects modified during the transaction. + // Ensure GC is not run so don't access freed objects when aborting transaction. + + ScopedAssertNoThreadSuspension ants(soa.Self(), "Transaction end"); + runtime->ExitTransactionMode(); + + if (!success) { + CHECK(soa.Self()->IsExceptionPending()); + mirror::Throwable* exception = soa.Self()->GetException(); + VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " + << exception->Dump(); + std::ostream* file_log = manager_->GetCompiler()-> + GetCompilerOptions().GetInitFailureOutput(); + if (file_log != nullptr) { + *file_log << descriptor << "\n"; + *file_log << exception->Dump() << "\n"; + } + soa.Self()->ClearException(); + transaction.Rollback(); + CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; } - soa.Self()->ClearException(); - transaction.Rollback(); - CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; } } + soa.Self()->AssertNoPendingException(); } - soa.Self()->AssertNoPendingException(); } + // Record the final class status if necessary. + ClassReference ref(manager_->GetDexFile(), class_def_index); + manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); } - // Record the final class status if necessary. - ClassReference ref(manager->GetDexFile(), class_def_index); - manager->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); + // Clear any class not found or verification exceptions. + soa.Self()->ClearException(); } - // Clear any class not found or verification exceptions. - soa.Self()->ClearException(); -} + + private: + const ParallelCompilationManager* const manager_; +}; void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -2105,7 +2301,8 @@ void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& } else { thread_count = thread_count_; } - context.ForAll(0, dex_file.NumClassDefs(), InitializeClass, thread_count); + InitializeClassVisitor visitor(&context); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); } void CompilerDriver::InitializeClasses(jobject class_loader, @@ -2132,101 +2329,102 @@ void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFi VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } -void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, - size_t class_def_index) { - ATRACE_CALL(); - const DexFile& dex_file = *manager->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ClassLinker* class_linker = manager->GetClassLinker(); - jobject jclass_loader = manager->GetClassLoader(); - Thread* self = Thread::Current(); - { - // Use a scoped object access to perform to the quick SkipClass check. - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ScopedObjectAccess soa(self); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - if (klass.Get() == nullptr) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); - } else if (SkipClass(jclass_loader, dex_file, klass.Get())) { +class CompileClassVisitor : public CompilationVisitor { + public: + explicit CompileClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} + + virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + ATRACE_CALL(); + const DexFile& dex_file = *manager_->GetDexFile(); + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + ClassLinker* class_linker = manager_->GetClassLinker(); + jobject jclass_loader = manager_->GetClassLoader(); + Thread* self = Thread::Current(); + { + // Use a scoped object access to perform to the quick SkipClass check. + const char* descriptor = dex_file.GetClassDescriptor(class_def); + ScopedObjectAccess soa(self); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); + Handle<mirror::Class> klass( + hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); + if (klass.Get() == nullptr) { + CHECK(soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); + } else if (SkipClass(jclass_loader, dex_file, klass.Get())) { + return; + } + } + ClassReference ref(&dex_file, class_def_index); + // Skip compiling classes with generic verifier failures since they will still fail at runtime + if (manager_->GetCompiler()->verification_results_->IsClassRejected(ref)) { + return; + } + const uint8_t* class_data = dex_file.GetClassData(class_def); + if (class_data == nullptr) { + // empty class, probably a marker interface return; } - } - ClassReference ref(&dex_file, class_def_index); - // Skip compiling classes with generic verifier failures since they will still fail at runtime - if (manager->GetCompiler()->verification_results_->IsClassRejected(ref)) { - return; - } - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - // empty class, probably a marker interface - return; - } - CompilerDriver* const driver = manager->GetCompiler(); + CompilerDriver* const driver = manager_->GetCompiler(); - // Can we run DEX-to-DEX compiler on this class ? - DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile; - { - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader))); - dex_to_dex_compilation_level = driver->GetDexToDexCompilationlevel( - soa.Self(), class_loader, dex_file, class_def); - } - ClassDataItemIterator it(dex_file, class_data); - // Skip fields - while (it.HasNextStaticField()) { - it.Next(); - } - while (it.HasNextInstanceField()) { - it.Next(); - } + // Can we run DEX-to-DEX compiler on this class ? + optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = + GetDexToDexCompilationLevel(self, *driver, jclass_loader, dex_file, class_def); + + ClassDataItemIterator it(dex_file, class_data); + // Skip fields + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } - bool compilation_enabled = driver->IsClassToCompile( - dex_file.StringByTypeIdx(class_def.class_idx_)); + bool compilation_enabled = driver->IsClassToCompile( + dex_file.StringByTypeIdx(class_def.class_idx_)); - // Compile direct methods - int64_t previous_direct_method_idx = -1; - while (it.HasNextDirectMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_direct_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 + // Compile direct methods + int64_t previous_direct_method_idx = -1; + while (it.HasNextDirectMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + if (method_idx == previous_direct_method_idx) { + // smali can create dex files with two encoded_methods sharing the same method_idx + // http://code.google.com/p/smali/issues/detail?id=119 + it.Next(); + continue; + } + previous_direct_method_idx = method_idx; + CompileMethod(self, driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), class_def_index, + method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, + compilation_enabled); it.Next(); - continue; - } - previous_direct_method_idx = method_idx; - driver->CompileMethod(self, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), class_def_index, - method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, - compilation_enabled); - it.Next(); - } - // Compile virtual methods - int64_t previous_virtual_method_idx = -1; - while (it.HasNextVirtualMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_virtual_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 + } + // Compile virtual methods + int64_t previous_virtual_method_idx = -1; + while (it.HasNextVirtualMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + if (method_idx == previous_virtual_method_idx) { + // smali can create dex files with two encoded_methods sharing the same method_idx + // http://code.google.com/p/smali/issues/detail?id=119 + it.Next(); + continue; + } + previous_virtual_method_idx = method_idx; + CompileMethod(self, driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), class_def_index, + method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, + compilation_enabled); it.Next(); - continue; } - previous_virtual_method_idx = method_idx; - driver->CompileMethod(self, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), class_def_index, - method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, - compilation_enabled); - it.Next(); + DCHECK(!it.HasNext()); } - DCHECK(!it.HasNext()); -} + + private: + const ParallelCompilationManager* const manager_; +}; void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, @@ -2234,109 +2432,22 @@ void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_fil TimingLogger::ScopedTiming t("Compile Dex File", timings); ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, &dex_file, dex_files, thread_pool); - context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_); -} - -// Does the runtime for the InstructionSet provide an implementation returned by -// GetQuickGenericJniStub allowing down calls that aren't compiled using a JNI compiler? -static bool InstructionSetHasGenericJniStub(InstructionSet isa) { - switch (isa) { - case kArm: - case kArm64: - case kThumb2: - case kMips: - case kMips64: - case kX86: - case kX86_64: return true; - default: return false; - } + CompileClassVisitor visitor(&context); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); } -void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_item, - uint32_t access_flags, InvokeType invoke_type, - uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled) { - CompiledMethod* compiled_method = nullptr; - uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; - MethodReference method_ref(&dex_file, method_idx); - - if ((access_flags & kAccNative) != 0) { - // Are we interpreting only and have support for generic JNI down calls? - if (!compiler_options_->IsCompilationEnabled() && - InstructionSetHasGenericJniStub(instruction_set_)) { - // Leaving this empty will trigger the generic JNI version - } else { - compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file); - CHECK(compiled_method != nullptr); - } - } else if ((access_flags & kAccAbstract) != 0) { - // Abstract methods don't have code. - } else { - bool has_verified_method = verification_results_->GetVerifiedMethod(method_ref) != nullptr; - bool compile = compilation_enabled && - // Basic checks, e.g., not <clinit>. - verification_results_->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadata. - has_verified_method && - // Is eligable for compilation by methods-to-compile filter. - IsMethodToCompile(method_ref); - if (compile) { - // NOTE: if compiler declines to compile this method, it will return null. - compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, class_loader, dex_file); - } - if (compiled_method == nullptr && dex_to_dex_compilation_level != kDontDexToDexCompile) { - // TODO: add a command-line option to disable DEX-to-DEX compilation ? - // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on - // it. - (*dex_to_dex_compiler_)(*this, code_item, access_flags, - invoke_type, class_def_idx, - method_idx, class_loader, dex_file, - has_verified_method ? dex_to_dex_compilation_level : kRequired); - } - } - if (kTimeCompileMethod) { - uint64_t duration_ns = NanoTime() - start_ns; - if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning())) { - LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file) - << " took " << PrettyDuration(duration_ns); - } - } - - if (compiled_method != nullptr) { - // Count non-relative linker patches. - size_t non_relative_linker_patch_count = 0u; - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (!patch.IsPcRelative()) { - ++non_relative_linker_patch_count; - } - } - bool compile_pic = GetCompilerOptions().GetCompilePic(); // Off by default - // When compiling with PIC, there should be zero non-relative linker patches - CHECK(!compile_pic || non_relative_linker_patch_count == 0u); - - DCHECK(GetCompiledMethod(method_ref) == nullptr) << PrettyMethod(method_idx, dex_file); - { - MutexLock mu(self, compiled_methods_lock_); - compiled_methods_.Put(method_ref, compiled_method); - non_relative_linker_patch_count_ += non_relative_linker_patch_count; - } - DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file); - } - - // Done compiling, delete the verified method to reduce native memory usage. Do not delete in - // optimizing compiler, which may need the verified method again for inlining. - if (compiler_kind_ != Compiler::kOptimizing) { - verification_results_->RemoveVerifiedMethod(method_ref); - } - - if (self->IsExceptionPending()) { - ScopedObjectAccess soa(self); - LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n" - << self->GetException()->Dump(); +void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, + CompiledMethod* const compiled_method, + size_t non_relative_linker_patch_count) { + DCHECK(GetCompiledMethod(method_ref) == nullptr) + << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file); + { + MutexLock mu(Thread::Current(), compiled_methods_lock_); + compiled_methods_.Put(method_ref, compiled_method); + non_relative_linker_patch_count_ += non_relative_linker_patch_count; } + DCHECK(GetCompiledMethod(method_ref) != nullptr) + << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file); } void CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) { @@ -2447,7 +2558,7 @@ bool CompilerDriver::WriteElf(const std::string& android_root, const std::vector<const art::DexFile*>& dex_files, OatWriter* oat_writer, art::File* file) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) { return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this); } else { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 2d7ceaeea1..5718be9e89 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -80,13 +80,6 @@ enum EntryPointCallingConvention { kQuickAbi }; -enum DexToDexCompilationLevel { - kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kRequired, // Dex-to-dex compilation required for correctness. - kOptimize // Perform required transformation and peep-hole optimizations. -}; -std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); - static constexpr bool kUseMurmur3Hash = true; class CompilerDriver { @@ -96,32 +89,33 @@ class CompilerDriver { // enabled. "image_classes" lets the compiler know what classes it // can assume will be in the image, with null implying all available // classes. - explicit CompilerDriver(const CompilerOptions* compiler_options, - VerificationResults* verification_results, - DexFileToMethodInlinerMap* method_inliner_map, - Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - bool image, std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, - size_t thread_count, bool dump_stats, bool dump_passes, - const std::string& dump_cfg_file_name, - CumulativeLogger* timer, int swap_fd, - const std::string& profile_file); + CompilerDriver(const CompilerOptions* compiler_options, + VerificationResults* verification_results, + DexFileToMethodInlinerMap* method_inliner_map, + Compiler::Kind compiler_kind, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + bool image, std::unordered_set<std::string>* image_classes, + std::unordered_set<std::string>* compiled_classes, + std::unordered_set<std::string>* compiled_methods, + size_t thread_count, bool dump_stats, bool dump_passes, + const std::string& dump_cfg_file_name, + CumulativeLogger* timer, int swap_fd, + const std::string& profile_file); ~CompilerDriver(); void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); - CompiledMethod* CompileMethod(Thread* self, ArtMethod*) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) WARN_UNUSED; + CompiledMethod* CompileArtMethod(Thread* self, ArtMethod*) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!compiled_methods_lock_) WARN_UNUSED; // Compile a single Method. void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_); VerificationResults* GetVerificationResults() const { return verification_results_; @@ -162,54 +156,61 @@ class CompilerDriver { // Generate the trampolines that are invoked by unresolved direct methods. const std::vector<uint8_t>* CreateInterpreterToInterpreterBridge() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateInterpreterToCompiledCodeBridge() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateJniDlsymLookup() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); CompiledClass* GetCompiledClass(ClassReference ref) const - LOCKS_EXCLUDED(compiled_classes_lock_); + REQUIRES(!compiled_classes_lock_); CompiledMethod* GetCompiledMethod(MethodReference ref) const - LOCKS_EXCLUDED(compiled_methods_lock_); + REQUIRES(!compiled_methods_lock_); size_t GetNonRelativeLinkerPatchCount() const - LOCKS_EXCLUDED(compiled_methods_lock_); + REQUIRES(!compiled_methods_lock_); + // Add a compiled method. + void AddCompiledMethod(const MethodReference& method_ref, + CompiledMethod* const compiled_method, + size_t non_relative_linker_patch_count) + REQUIRES(!compiled_methods_lock_); // Remove and delete a compiled method. - void RemoveCompiledMethod(const MethodReference& method_ref); + void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_); void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index); + uint16_t class_def_index) + REQUIRES(!freezing_constructor_lock_); bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, - uint16_t class_def_index) const; + uint16_t class_def_index) const + REQUIRES(!freezing_constructor_lock_); // Callbacks from compiler to see what runtime checks must be generated. bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx); bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Are runtime access checks necessary in the compiled code? bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file, uint32_t type_idx, bool* type_known_final = nullptr, bool* type_known_abstract = nullptr, bool* equals_referrers_class = nullptr) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Are runtime access and instantiable checks necessary in the code? bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file, uint32_t type_idx) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx, bool* is_type_initialized, bool* use_direct_type_ptr, @@ -223,22 +224,22 @@ class CompilerDriver { // Get the DexCache for the mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); mirror::ClassLoader* GetClassLoader(ScopedObjectAccess& soa, const DexCompilationUnit* mUnit) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Resolve compiling method's class. Returns null on failure. mirror::Class* ResolveCompilingMethodsClass( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); mirror::Class* ResolveClass( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, uint16_t type_index, const DexCompilationUnit* mUnit) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Resolve a field. Returns null on failure, including incompatible class change. // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static. @@ -246,40 +247,40 @@ class CompilerDriver { const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t field_idx, bool is_static) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Resolve a field with a given dex file. ArtField* ResolveFieldWithDexFile( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexFile* dex_file, uint32_t field_idx, bool is_static) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Get declaration location of a resolved field. void GetResolvedFieldDexFileLocation( ArtField* resolved_field, const DexFile** declaring_dex_file, uint16_t* declaring_class_idx, uint16_t* declaring_field_idx) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); - bool IsFieldVolatile(ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - MemberOffset GetFieldOffset(ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool IsFieldVolatile(ArtField* field) SHARED_REQUIRES(Locks::mutator_lock_); + MemberOffset GetFieldOffset(ArtField* field) SHARED_REQUIRES(Locks::mutator_lock_); // Find a dex cache for a dex file. inline mirror::DexCache* FindDexCache(const DexFile* dex_file) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset. std::pair<bool, bool> IsFastInstanceField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, ArtField* resolved_field, uint16_t field_idx) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we fast-path an SGET/SPUT access to a static field? If yes, compute the type index // of the declaring class in the referrer's dex file. std::pair<bool, bool> IsFastStaticField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Return whether the declaring class of `resolved_method` is // available to `referrer_class`. If this is true, compute the type @@ -291,34 +292,34 @@ class CompilerDriver { ArtMethod* resolved_method, uint16_t method_idx, uint32_t* storage_index) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Is static field's in referrer's class? bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Is static field's class initialized? bool IsStaticFieldsClassInitialized(mirror::Class* referrer_class, ArtField* resolved_field) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Resolve a method. Returns null on failure, including incompatible class change. ArtMethod* ResolveMethod( ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change = true) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Get declaration location of a resolved field. void GetResolvedMethodDexFileLocation( ArtMethod* resolved_method, const DexFile** declaring_dex_file, uint16_t* declaring_class_idx, uint16_t* declaring_method_idx) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Get the index in the vtable of the method. uint16_t GetResolvedMethodVTableIndex( ArtMethod* resolved_method, InvokeType type) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we fast-path an INVOKE? If no, returns 0. If yes, returns a non-zero opaque flags value // for ProcessedInvoke() and computes the necessary lowering info. @@ -328,13 +329,13 @@ class CompilerDriver { mirror::Class* referrer_class, ArtMethod* resolved_method, InvokeType* invoke_type, MethodReference* target_method, const MethodReference* devirt_target, uintptr_t* direct_code, uintptr_t* direct_method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Is method's class initialized for an invoke? // For static invokes to determine whether we need to consider potential call to <clinit>(). // For non-static invokes, assuming a non-null reference, the class is always initialized. bool IsMethodsClassInitialized(mirror::Class* referrer_class, ArtMethod* resolved_method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the // dex cache arrays don't have a fixed layout. @@ -349,18 +350,18 @@ class CompilerDriver { ArtField** resolved_field, mirror::Class** referrer_class, mirror::DexCache** dex_cache) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we fast path instance field access? Computes field's offset and volatility. bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, MemberOffset* field_offset, bool* is_volatile) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); ArtField* ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, const ScopedObjectAccess& soa) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we fastpath static field access? Computes field's offset, volatility and whether the @@ -369,7 +370,7 @@ class CompilerDriver { MemberOffset* field_offset, uint32_t* storage_index, bool* is_referrers_class, bool* is_volatile, bool* is_initialized, Primitive::Type* type) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Can we fastpath a interface, super class or virtual method call? Computes method's vtable // index. @@ -377,7 +378,7 @@ class CompilerDriver { bool update_stats, bool enable_devirtualization, InvokeType* type, MethodReference* target_method, int* vtable_idx, uintptr_t* direct_code, uintptr_t* direct_method) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const; bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc); @@ -445,7 +446,7 @@ class CompilerDriver { bool IsMethodToCompile(const MethodReference& method_ref) const; void RecordClassStatus(ClassReference ref, mirror::Class::Status status) - LOCKS_EXCLUDED(compiled_classes_lock_); + REQUIRES(!compiled_classes_lock_); // Checks if the specified method has been verified without failures. Returns // false if the method is not in the verification results (GetVerificationResults). @@ -473,6 +474,10 @@ class CompilerDriver { had_hard_verifier_failure_ = true; } + Compiler::Kind GetCompilerKind() { + return compiler_kind_; + } + private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -487,7 +492,7 @@ class CompilerDriver { ArtMember* resolved_member, uint16_t member_idx, uint32_t* storage_index) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can `referrer_class` access the resolved `member`? // Dispatch call to mirror::Class::CanAccessResolvedField or @@ -499,17 +504,17 @@ class CompilerDriver { ArtMember* member, mirror::DexCache* dex_cache, uint32_t field_idx) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we assume that the klass is initialized? bool CanAssumeClassIsInitialized(mirror::Class* klass) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Can we assume that the klass is loaded? bool CanAssumeClassIsLoaded(mirror::Class* klass) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. @@ -540,71 +545,58 @@ class CompilerDriver { /*out*/int* stats_flags, MethodReference* target_method, uintptr_t* direct_code, uintptr_t* direct_method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); private: - DexToDexCompilationLevel GetDexToDexCompilationlevel( - Thread* self, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); - void LoadImageClasses(TimingLogger* timings); + void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); // Attempt to resolve all type, methods, fields, and strings // referenced from code in the dex file following PathClassLoader // ordering semantics. void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); void ResolveDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings); void VerifyDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings); void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); void InitializeClasses(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_); + REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); - void UpdateImageClasses(TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); + void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); static void FindClinitImageClassesCallback(mirror::Object* object, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings); void CompileDexFile(jobject class_loader, const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) - LOCKS_EXCLUDED(Locks::mutator_lock_); - void CompileMethod(Thread* self, const DexFile::CodeItem* code_item, uint32_t access_flags, - InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled) - LOCKS_EXCLUDED(compiled_methods_lock_); - - static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); // Swap pool and allocator used for native allocations. May be file-backed. Needs to be first // as other fields rely on this. @@ -634,8 +626,13 @@ class CompilerDriver { ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_); typedef SafeMap<const MethodReference, CompiledMethod*, MethodReferenceComparator> MethodTable; - // All method references that this compiler has compiled. + + public: + // Lock is public so that non-members can have lock annotations. mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + + private: + // All method references that this compiler has compiled. MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_); // Number of non-relative patches in all compiled methods. These patches need space // in the .oat_patches ELF section if requested in the compiler options. @@ -675,14 +672,6 @@ class CompilerDriver { typedef void (*CompilerCallbackFn)(CompilerDriver& driver); typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver); - typedef void (*DexToDexCompilerFn)(CompilerDriver& driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, InvokeType invoke_type, - uint32_t class_dex_idx, uint32_t method_idx, - jobject class_loader, const DexFile& dex_file, - DexToDexCompilationLevel dex_to_dex_compilation_level); - DexToDexCompilerFn dex_to_dex_compiler_; - void* compiler_context_; bool support_boot_image_fixup_; @@ -775,6 +764,7 @@ class CompilerDriver { DedupeSet<ArrayRef<const uint8_t>, SwapVector<uint8_t>, size_t, DedupeHashFunc<const uint8_t>, 4> dedupe_cfi_info_; + friend class CompileClassVisitor; DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index b358f4f396..e35d07da83 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -37,7 +37,7 @@ namespace art { class CompilerDriverTest : public CommonCompilerTest { protected: - void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) { + void CompileAll(jobject class_loader) REQUIRES(!Locks::mutator_lock_) { TimingLogger timings("CompilerDriverTest::CompileAll", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); compiler_driver_->CompileAll(class_loader, @@ -49,7 +49,7 @@ class CompilerDriverTest : public CommonCompilerTest { void EnsureCompiled(jobject class_loader, const char* class_name, const char* method, const char* signature, bool is_virtual) - LOCKS_EXCLUDED(Locks::mutator_lock_) { + REQUIRES(!Locks::mutator_lock_) { CompileAll(class_loader); Thread::Current()->TransitionFromSuspendedToRunnable(); bool started = runtime_->Start(); diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 226e6b7952..3f5a1eabb6 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -27,6 +27,8 @@ CompilerOptions::CompilerOptions() small_method_threshold_(kDefaultSmallMethodThreshold), tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), + inline_depth_limit_(kDefaultInlineDepthLimit), + inline_max_code_units_(kDefaultInlineMaxCodeUnits), include_patch_information_(kDefaultIncludePatchInformation), top_k_profile_threshold_(kDefaultTopKProfileThreshold), debuggable_(false), @@ -52,6 +54,8 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, + size_t inline_depth_limit, + size_t inline_max_code_units, bool include_patch_information, double top_k_profile_threshold, bool debuggable, @@ -71,6 +75,8 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, small_method_threshold_(small_method_threshold), tiny_method_threshold_(tiny_method_threshold), num_dex_methods_threshold_(num_dex_methods_threshold), + inline_depth_limit_(inline_depth_limit), + inline_max_code_units_(inline_max_code_units), include_patch_information_(include_patch_information), top_k_profile_threshold_(top_k_profile_threshold), debuggable_(debuggable), diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index fe681e2a53..d2a90ec87f 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -51,6 +51,12 @@ class CompilerOptions FINAL { static constexpr double kDefaultTopKProfileThreshold = 90.0; static const bool kDefaultGenerateDebugInfo = kIsDebugBuild; static const bool kDefaultIncludePatchInformation = false; + static const size_t kDefaultInlineDepthLimit = 3; + static const size_t kDefaultInlineMaxCodeUnits = 18; + + // Default inlining settings when the space filter is used. + static constexpr size_t kSpaceFilterInlineDepthLimit = 3; + static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10; CompilerOptions(); ~CompilerOptions(); @@ -61,6 +67,8 @@ class CompilerOptions FINAL { size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, + size_t inline_depth_limit, + size_t inline_max_code_units, bool include_patch_information, double top_k_profile_threshold, bool debuggable, @@ -137,6 +145,14 @@ class CompilerOptions FINAL { return num_dex_methods_threshold_; } + size_t GetInlineDepthLimit() const { + return inline_depth_limit_; + } + + size_t GetInlineMaxCodeUnits() const { + return inline_max_code_units_; + } + double GetTopKProfileThreshold() const { return top_k_profile_threshold_; } @@ -202,6 +218,8 @@ class CompilerOptions FINAL { const size_t small_method_threshold_; const size_t tiny_method_threshold_; const size_t num_dex_methods_threshold_; + const size_t inline_depth_limit_; + const size_t inline_max_code_units_; const bool include_patch_information_; // When using a profile file only the top K% of the profiled samples will be compiled. const double top_k_profile_threshold_; diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 4d423d007f..a07d27c1d2 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -27,7 +27,7 @@ namespace art { namespace dwarf { // Run the tests only on host since we need objdump. -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; @@ -336,7 +336,7 @@ TEST_F(DwarfTest, DebugInfo) { CheckObjdumpOutput(is64bit, "-W"); } -#endif // HAVE_ANDROID_OS +#endif // __ANDROID__ } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h index 8e13b51bbe..03f8ceb306 100644 --- a/compiler/elf_writer.h +++ b/compiler/elf_writer.h @@ -57,7 +57,7 @@ class ElfWriter { const std::vector<const DexFile*>& dex_files, const std::string& android_root, bool is_host) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; + SHARED_REQUIRES(Locks::mutator_lock_) = 0; const CompilerDriver* const compiler_driver_; File* const elf_file_; diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index c68bbc0655..c10ffebbbc 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -249,16 +249,16 @@ void WriteDebugSections(const CompilerDriver* compiler, // Find all addresses (low_pc) which contain deduped methods. // The first instance of method is not marked deduped_, but the rest is. std::unordered_set<uint32_t> deduped_addresses; - for (auto it = method_infos.begin(); it != method_infos.end(); ++it) { - if (it->deduped_) { - deduped_addresses.insert(it->low_pc_); + for (const OatWriter::DebugInfo& mi : method_infos) { + if (mi.deduped_) { + deduped_addresses.insert(mi.low_pc_); } } // Group the methods into compilation units based on source file. std::vector<std::vector<const OatWriter::DebugInfo*>> compilation_units; const char* last_source_file = nullptr; - for (const auto& mi : method_infos) { + for (const OatWriter::DebugInfo& mi : method_infos) { // Attribute given instruction range only to single method. // Otherwise the debugger might get really confused. if (!mi.deduped_) { diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h index fd202eeb5f..83781abeff 100644 --- a/compiler/elf_writer_quick.h +++ b/compiler/elf_writer_quick.h @@ -33,7 +33,7 @@ class ElfWriterQuick FINAL : public ElfWriter { const std::string& android_root, bool is_host, const CompilerDriver& driver) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); static void EncodeOatPatches(const std::vector<uintptr_t>& locations, std::vector<uint8_t>* buffer); @@ -44,7 +44,7 @@ class ElfWriterQuick FINAL : public ElfWriter { const std::string& android_root, bool is_host) OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); private: ElfWriterQuick(const CompilerDriver& driver, File* elf_file) diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index fdfeb485fd..a03ff755ab 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -73,7 +73,7 @@ static constexpr bool kBinObjects = true; static constexpr bool kComputeEagerResolvedStrings = false; static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { Class* klass = obj->GetClass(); CHECK_NE(PrettyClass(klass), "com.android.dex.Dex"); } @@ -539,16 +539,19 @@ bool ImageWriter::AllocMemory() { return true; } +class ComputeLazyFieldsForClassesVisitor : public ClassVisitor { + public: + bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + StackHandleScope<1> hs(Thread::Current()); + mirror::Class::ComputeName(hs.NewHandle(c)); + return true; + } +}; + void ImageWriter::ComputeLazyFieldsForImageClasses() { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - class_linker->VisitClassesWithoutClassesLock(ComputeLazyFieldsForClassesVisitor, nullptr); -} - -bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) { - Thread* self = Thread::Current(); - StackHandleScope<1> hs(self); - mirror::Class::ComputeName(hs.NewHandle(c)); - return true; + ComputeLazyFieldsForClassesVisitor visitor; + class_linker->VisitClassesWithoutClassesLock(&visitor); } void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) { @@ -592,9 +595,20 @@ bool ImageWriter::IsImageClass(Class* klass) { return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp)); } -struct NonImageClasses { - ImageWriter* image_writer; - std::set<std::string>* non_image_classes; +class NonImageClassesVisitor : public ClassVisitor { + public: + explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {} + + bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + if (!image_writer_->IsImageClass(klass)) { + std::string temp; + non_image_classes_.insert(klass->GetDescriptor(&temp)); + } + return true; + } + + std::set<std::string> non_image_classes_; + ImageWriter* const image_writer_; }; void ImageWriter::PruneNonImageClasses() { @@ -606,14 +620,11 @@ void ImageWriter::PruneNonImageClasses() { Thread* self = Thread::Current(); // Make a list of classes we would like to prune. - std::set<std::string> non_image_classes; - NonImageClasses context; - context.image_writer = this; - context.non_image_classes = &non_image_classes; - class_linker->VisitClasses(NonImageClassesVisitor, &context); + NonImageClassesVisitor visitor(this); + class_linker->VisitClasses(&visitor); // Remove the undesired classes from the class roots. - for (const std::string& it : non_image_classes) { + for (const std::string& it : visitor.non_image_classes_) { bool result = class_linker->RemoveClass(it.c_str(), nullptr); DCHECK(result); } @@ -669,15 +680,6 @@ void ImageWriter::PruneNonImageClasses() { class_linker->DropFindArrayClassCache(); } -bool ImageWriter::NonImageClassesVisitor(Class* klass, void* arg) { - NonImageClasses* context = reinterpret_cast<NonImageClasses*>(arg); - if (!context->image_writer->IsImageClass(klass)) { - std::string temp; - context->non_image_classes->insert(klass->GetDescriptor(&temp)); - } - return true; -} - void ImageWriter::CheckNonImageClassesRemoved() { if (compiler_driver_.GetImageClasses() != nullptr) { gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -715,8 +717,10 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( - obj->AsString()->Intern()); + // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since + // we are guaranteed to not have GC during image writing. + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString( + obj->AsString()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early @@ -821,35 +825,68 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { field_offset = MemberOffset(field_offset.Uint32Value() + sizeof(mirror::HeapReference<mirror::Object>)); } - // Visit and assign offsets for fields. + // Visit and assign offsets for fields and field arrays. auto* as_klass = h_obj->AsClass(); - ArtField* fields[] = { as_klass->GetSFields(), as_klass->GetIFields() }; - size_t num_fields[] = { as_klass->NumStaticFields(), as_klass->NumInstanceFields() }; - for (size_t i = 0; i < 2; ++i) { - for (size_t j = 0; j < num_fields[i]; ++j) { - auto* field = fields[i] + j; - auto it = native_object_reloc_.find(field); - CHECK(it == native_object_reloc_.end()) << "Field at index " << i << ":" << j - << " already assigned " << PrettyField(field); - native_object_reloc_.emplace( - field, NativeObjectReloc { bin_slot_sizes_[kBinArtField], kBinArtField }); - bin_slot_sizes_[kBinArtField] += sizeof(ArtField); + LengthPrefixedArray<ArtField>* fields[] = { + as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(), + }; + for (LengthPrefixedArray<ArtField>* cur_fields : fields) { + // Total array length including header. + if (cur_fields != nullptr) { + const size_t header_size = LengthPrefixedArray<ArtField>::ComputeSize(0); + // Forward the entire array at once. + auto it = native_object_relocations_.find(cur_fields); + CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields + << " already forwarded"; + size_t& offset = bin_slot_sizes_[kBinArtField]; + native_object_relocations_.emplace( + cur_fields, NativeObjectRelocation { + offset, kNativeObjectRelocationTypeArtFieldArray }); + offset += header_size; + // Forward individual fields so that we can quickly find where they belong. + for (size_t i = 0, count = cur_fields->Length(); i < count; ++i) { + // Need to forward arrays separate of fields. + ArtField* field = &cur_fields->At(i); + auto it2 = native_object_relocations_.find(field); + CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i + << " already assigned " << PrettyField(field) << " static=" << field->IsStatic(); + native_object_relocations_.emplace( + field, NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField }); + offset += sizeof(ArtField); + } } } // Visit and assign offsets for methods. - IterationRange<StrideIterator<ArtMethod>> method_arrays[] = { - as_klass->GetDirectMethods(target_ptr_size_), - as_klass->GetVirtualMethods(target_ptr_size_) + LengthPrefixedArray<ArtMethod>* method_arrays[] = { + as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(), }; - for (auto& array : method_arrays) { + for (LengthPrefixedArray<ArtMethod>* array : method_arrays) { + if (array == nullptr) { + continue; + } bool any_dirty = false; size_t count = 0; - for (auto& m : array) { + const size_t method_size = ArtMethod::ObjectSize(target_ptr_size_); + auto iteration_range = MakeIterationRangeFromLengthPrefixedArray(array, method_size); + for (auto& m : iteration_range) { any_dirty = any_dirty || WillMethodBeDirty(&m); ++count; } - for (auto& m : array) { - AssignMethodOffset(&m, any_dirty ? kBinArtMethodDirty : kBinArtMethodClean); + NativeObjectRelocationType type = any_dirty ? kNativeObjectRelocationTypeArtMethodDirty : + kNativeObjectRelocationTypeArtMethodClean; + Bin bin_type = BinTypeForNativeRelocationType(type); + // Forward the entire array at once, but header first. + const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0, method_size); + auto it = native_object_relocations_.find(array); + CHECK(it == native_object_relocations_.end()) << "Method array " << array + << " already forwarded"; + size_t& offset = bin_slot_sizes_[bin_type]; + native_object_relocations_.emplace(array, NativeObjectRelocation { offset, + any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty : + kNativeObjectRelocationTypeArtMethodArrayClean }); + offset += header_size; + for (auto& m : iteration_range) { + AssignMethodOffset(&m, type); } (any_dirty ? dirty_methods_ : clean_methods_) += count; } @@ -867,12 +904,13 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } } -void ImageWriter::AssignMethodOffset(ArtMethod* method, Bin bin) { - auto it = native_object_reloc_.find(method); - CHECK(it == native_object_reloc_.end()) << "Method " << method << " already assigned " +void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) { + auto it = native_object_relocations_.find(method); + CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned " << PrettyMethod(method); - native_object_reloc_.emplace(method, NativeObjectReloc { bin_slot_sizes_[bin], bin }); - bin_slot_sizes_[bin] += ArtMethod::ObjectSize(target_ptr_size_); + size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(type)]; + native_object_relocations_.emplace(method, NativeObjectRelocation { offset, type }); + offset += ArtMethod::ObjectSize(target_ptr_size_); } void ImageWriter::WalkFieldsCallback(mirror::Object* obj, void* arg) { @@ -926,10 +964,22 @@ void ImageWriter::CalculateNewObjectOffsets() { runtime->GetCalleeSaveMethod(Runtime::kRefsOnly); image_methods_[ImageHeader::kRefsAndArgsSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs); + + // Add room for fake length prefixed array. + const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean; + auto it = native_object_relocations_.find(&image_method_array_); + CHECK(it == native_object_relocations_.end()); + size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)]; + native_object_relocations_.emplace(&image_method_array_, + NativeObjectRelocation { offset, image_method_type }); + const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize( + 0, ArtMethod::ObjectSize(target_ptr_size_)); + CHECK_ALIGNED(array_size, 8u); + offset += array_size; for (auto* m : image_methods_) { CHECK(m != nullptr); CHECK(m->IsRuntimeMethod()); - AssignMethodOffset(m, kBinArtMethodDirty); + AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean); } // Calculate cumulative bin slot sizes. @@ -949,10 +999,10 @@ void ImageWriter::CalculateNewObjectOffsets() { image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get())); // Update the native relocations by adding their bin sums. - for (auto& pair : native_object_reloc_) { - auto& native_reloc = pair.second; - native_reloc.offset += image_objects_offset_begin_ + - bin_slot_previous_sizes_[native_reloc.bin_type]; + for (auto& pair : native_object_relocations_) { + NativeObjectRelocation& relocation = pair.second; + Bin bin_type = BinTypeForNativeRelocationType(relocation.type); + relocation.offset += image_objects_offset_begin_ + bin_slot_previous_sizes_[bin_type]; } // Calculate how big the intern table will be after being serialized. @@ -1021,8 +1071,8 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { } ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { - auto it = native_object_reloc_.find(method); - CHECK(it != native_object_reloc_.end()) << PrettyMethod(method) << " @ " << method; + auto it = native_object_relocations_.find(method); + CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method; CHECK_GE(it->second.offset, image_end_) << "ArtMethods should be after Objects"; return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); } @@ -1033,7 +1083,7 @@ class FixupRootVisitor : public RootVisitor { } void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) - OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { *roots[i] = ImageAddress(*roots[i]); } @@ -1041,7 +1091,7 @@ class FixupRootVisitor : public RootVisitor { void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) - OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); } @@ -1050,7 +1100,7 @@ class FixupRootVisitor : public RootVisitor { private: ImageWriter* const image_writer_; - mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) { const size_t offset = image_writer_->GetImageOffset(obj); auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); VLOG(compiler) << "Update root from " << obj << " to " << dest; @@ -1060,20 +1110,34 @@ class FixupRootVisitor : public RootVisitor { void ImageWriter::CopyAndFixupNativeData() { // Copy ArtFields and methods to their locations and update the array for convenience. - for (auto& pair : native_object_reloc_) { - auto& native_reloc = pair.second; - if (native_reloc.bin_type == kBinArtField) { - auto* dest = image_->Begin() + native_reloc.offset; - DCHECK_GE(dest, image_->Begin() + image_end_); - memcpy(dest, pair.first, sizeof(ArtField)); - reinterpret_cast<ArtField*>(dest)->SetDeclaringClass( - GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass())); - } else { - CHECK(IsArtMethodBin(native_reloc.bin_type)) << native_reloc.bin_type; - auto* dest = image_->Begin() + native_reloc.offset; - DCHECK_GE(dest, image_->Begin() + image_end_); - CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first), - reinterpret_cast<ArtMethod*>(dest)); + for (auto& pair : native_object_relocations_) { + NativeObjectRelocation& relocation = pair.second; + auto* dest = image_->Begin() + relocation.offset; + DCHECK_GE(dest, image_->Begin() + image_end_); + switch (relocation.type) { + case kNativeObjectRelocationTypeArtField: { + memcpy(dest, pair.first, sizeof(ArtField)); + reinterpret_cast<ArtField*>(dest)->SetDeclaringClass( + GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass())); + break; + } + case kNativeObjectRelocationTypeArtMethodClean: + case kNativeObjectRelocationTypeArtMethodDirty: { + CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first), + reinterpret_cast<ArtMethod*>(dest)); + break; + } + // For arrays, copy just the header since the elements will get copied by their corresponding + // relocations. + case kNativeObjectRelocationTypeArtFieldArray: { + memcpy(dest, pair.first, LengthPrefixedArray<ArtField>::ComputeSize(0)); + break; + } + case kNativeObjectRelocationTypeArtMethodArrayClean: + case kNativeObjectRelocationTypeArtMethodArrayDirty: { + memcpy(dest, pair.first, LengthPrefixedArray<ArtMethod>::ComputeSize(0)); + break; + } } } // Fixup the image method roots. @@ -1082,12 +1146,12 @@ void ImageWriter::CopyAndFixupNativeData() { for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { auto* m = image_methods_[i]; CHECK(m != nullptr); - auto it = native_object_reloc_.find(m); - CHECK(it != native_object_reloc_.end()) << "No fowarding for " << PrettyMethod(m); - auto& native_reloc = it->second; - CHECK(methods_section.Contains(native_reloc.offset)) << native_reloc.offset << " not in " + auto it = native_object_relocations_.find(m); + CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(m); + NativeObjectRelocation& relocation = it->second; + CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in " << methods_section; - CHECK(IsArtMethodBin(native_reloc.bin_type)) << native_reloc.bin_type; + CHECK(relocation.IsArtMethodRelocation()) << relocation.type; auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); } @@ -1139,9 +1203,9 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a for (size_t i = 0, count = num_elements; i < count; ++i) { auto* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_); if (elem != nullptr) { - auto it = native_object_reloc_.find(elem); - if (it == native_object_reloc_.end()) { - if (IsArtMethodBin(array_type)) { + auto it = native_object_relocations_.find(elem); + if (it == native_object_relocations_.end()) { + if (it->second.IsArtMethodRelocation()) { auto* method = reinterpret_cast<ArtMethod*>(elem); LOG(FATAL) << "No relocation entry for ArtMethod " << PrettyMethod(method) << " @ " << method << " idx=" << i << "/" << num_elements << " with declaring class " @@ -1187,8 +1251,15 @@ class FixupVisitor { FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) { } + // Ignore class roots since we don't have a way to map them to the destination. These are handled + // with other logic. + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) + const {} + void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {} + + void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const - EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { + REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset); // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the // image. @@ -1198,8 +1269,7 @@ class FixupVisitor { // java.lang.ref.Reference visitor. void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent())); } @@ -1215,64 +1285,50 @@ class FixupClassVisitor FINAL : public FixupVisitor { } void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const - EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { + REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { DCHECK(obj->IsClass()); FixupVisitor::operator()(obj, offset, /*is_static*/false); } void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref ATTRIBUTE_UNUSED) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { LOG(FATAL) << "Reference not expected here."; } }; +void* ImageWriter::NativeLocationInImage(void* obj) { + if (obj == nullptr) { + return nullptr; + } + auto it = native_object_relocations_.find(obj); + CHECK(it != native_object_relocations_.end()) << obj; + const NativeObjectRelocation& relocation = it->second; + return reinterpret_cast<void*>(image_begin_ + relocation.offset); +} + void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) { - // Copy and fix up ArtFields in the class. - ArtField* fields[2] = { orig->GetSFields(), orig->GetIFields() }; - size_t num_fields[2] = { orig->NumStaticFields(), orig->NumInstanceFields() }; // Update the field arrays. - for (size_t i = 0; i < 2; ++i) { - if (num_fields[i] == 0) { - CHECK(fields[i] == nullptr); - continue; - } - auto it = native_object_reloc_.find(fields[i]); - CHECK(it != native_object_reloc_.end()) << PrettyClass(orig) << " : " << PrettyField(fields[i]); - auto* image_fields = reinterpret_cast<ArtField*>(image_begin_ + it->second.offset); - if (i == 0) { - copy->SetSFieldsUnchecked(image_fields); - } else { - copy->SetIFieldsUnchecked(image_fields); - } - } - // Update direct / virtual method arrays. - auto* direct_methods = orig->GetDirectMethodsPtr(); - if (direct_methods != nullptr) { - auto it = native_object_reloc_.find(direct_methods); - CHECK(it != native_object_reloc_.end()) << PrettyClass(orig); - copy->SetDirectMethodsPtrUnchecked( - reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset)); - } - auto* virtual_methods = orig->GetVirtualMethodsPtr(); - if (virtual_methods != nullptr) { - auto it = native_object_reloc_.find(virtual_methods); - CHECK(it != native_object_reloc_.end()) << PrettyClass(orig); - copy->SetVirtualMethodsPtr( - reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset)); - } + copy->SetSFieldsPtrUnchecked(reinterpret_cast<LengthPrefixedArray<ArtField>*>( + NativeLocationInImage(orig->GetSFieldsPtr()))); + copy->SetIFieldsPtrUnchecked(reinterpret_cast<LengthPrefixedArray<ArtField>*>( + NativeLocationInImage(orig->GetIFieldsPtr()))); + // Update direct and virtual method arrays. + copy->SetDirectMethodsPtrUnchecked(reinterpret_cast<LengthPrefixedArray<ArtMethod>*>( + NativeLocationInImage(orig->GetDirectMethodsPtr()))); + copy->SetVirtualMethodsPtr(reinterpret_cast<LengthPrefixedArray<ArtMethod>*>( + NativeLocationInImage(orig->GetVirtualMethodsPtr()))); // Fix up embedded tables. if (orig->ShouldHaveEmbeddedImtAndVTable()) { for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) { - auto it = native_object_reloc_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_)); - CHECK(it != native_object_reloc_.end()) << PrettyClass(orig); + auto it = native_object_relocations_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_)); + CHECK(it != native_object_relocations_.end()) << PrettyClass(orig); copy->SetEmbeddedVTableEntryUnchecked( i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_); } for (size_t i = 0; i < mirror::Class::kImtSize; ++i) { - auto it = native_object_reloc_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_)); - CHECK(it != native_object_reloc_.end()) << PrettyClass(orig); + auto it = native_object_relocations_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_)); + CHECK(it != native_object_relocations_.end()) << PrettyClass(orig); copy->SetEmbeddedImTableEntry( i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_); } @@ -1313,11 +1369,16 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) { auto* dest = down_cast<mirror::AbstractMethod*>(copy); auto* src = down_cast<mirror::AbstractMethod*>(orig); ArtMethod* src_method = src->GetArtMethod(); - auto it = native_object_reloc_.find(src_method); - CHECK(it != native_object_reloc_.end()) << "Missing relocation for AbstractMethod.artMethod " - << PrettyMethod(src_method); + auto it = native_object_relocations_.find(src_method); + CHECK(it != native_object_relocations_.end()) + << "Missing relocation for AbstractMethod.artMethod " << PrettyMethod(src_method); dest->SetArtMethod( reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset)); + } else if (!klass->IsArrayClass() && klass->IsSubClass(down_cast<mirror::Class*>( + Thread::Current()->DecodeJObject(WellKnownClasses::java_lang_ClassLoader)))) { + // If src is a ClassLoader, set the class table to null so that it gets recreated by the + // ClassLoader. + down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr); } FixupVisitor visitor(this, copy); orig->VisitReferences<true /*visit class*/>(visitor, visitor); @@ -1495,4 +1556,19 @@ uint8_t* ImageWriter::GetOatFileBegin() const { bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize); } +ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) { + switch (type) { + case kNativeObjectRelocationTypeArtField: + case kNativeObjectRelocationTypeArtFieldArray: + return kBinArtField; + case kNativeObjectRelocationTypeArtMethodClean: + case kNativeObjectRelocationTypeArtMethodArrayClean: + return kBinArtMethodClean; + case kNativeObjectRelocationTypeArtMethodDirty: + case kNativeObjectRelocationTypeArtMethodArrayDirty: + return kBinArtMethodDirty; + } + UNREACHABLE(); +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 9d45ce2bd4..f4e10cc6ea 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -18,7 +18,7 @@ #define ART_COMPILER_IMAGE_WRITER_H_ #include <stdint.h> -#include <valgrind.h> +#include "base/memory_tool.h" #include <cstddef> #include <memory> @@ -30,6 +30,7 @@ #include "base/macros.h" #include "driver/compiler_driver.h" #include "gc/space/space.h" +#include "length_prefixed_array.h" #include "lock_word.h" #include "mem_map.h" #include "oat_file.h" @@ -54,7 +55,8 @@ class ImageWriter FINAL { quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), - intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) { + intern_table_bytes_(0u), image_method_array_(ImageHeader::kImageMethodsCount), + dirty_methods_(0u), clean_methods_(0u) { CHECK_NE(image_begin, 0U); std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); } @@ -69,15 +71,15 @@ class ImageWriter FINAL { } template <typename T> - T* GetImageAddress(T* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) { return object == nullptr ? nullptr : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object)); } - ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); mirror::HeapReference<mirror::Object>* GetDexCacheArrayElementImageAddress( - const DexFile* dex_file, uint32_t offset) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const DexFile* dex_file, uint32_t offset) const SHARED_REQUIRES(Locks::mutator_lock_) { auto it = dex_cache_array_starts_.find(dex_file); DCHECK(it != dex_cache_array_starts_.end()); return reinterpret_cast<mirror::HeapReference<mirror::Object>*>( @@ -88,7 +90,7 @@ class ImageWriter FINAL { bool Write(const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) - LOCKS_EXCLUDED(Locks::mutator_lock_); + REQUIRES(!Locks::mutator_lock_); uintptr_t GetOatDataBegin() { return reinterpret_cast<uintptr_t>(oat_data_begin_); @@ -98,7 +100,7 @@ class ImageWriter FINAL { bool AllocMemory(); // Mark the objects defined in this space in the given live bitmap. - void RecordImageAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void RecordImageAllocations() SHARED_REQUIRES(Locks::mutator_lock_); // Classify different kinds of bins that objects end up getting packed into during image writing. enum Bin { @@ -129,9 +131,18 @@ class ImageWriter FINAL { // Number of bins which are for mirror objects. kBinMirrorCount = kBinArtField, }; - friend std::ostream& operator<<(std::ostream& stream, const Bin& bin); + enum NativeObjectRelocationType { + kNativeObjectRelocationTypeArtField, + kNativeObjectRelocationTypeArtFieldArray, + kNativeObjectRelocationTypeArtMethodClean, + kNativeObjectRelocationTypeArtMethodArrayClean, + kNativeObjectRelocationTypeArtMethodDirty, + kNativeObjectRelocationTypeArtMethodArrayDirty, + }; + friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type); + static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1); // uint32 = typeof(lockword_) // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK @@ -165,32 +176,32 @@ class ImageWriter FINAL { // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void SetImageOffset(mirror::Object* object, size_t offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); bool IsImageOffsetAssigned(mirror::Object* object) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); + size_t GetImageOffset(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); void UpdateImageOffset(mirror::Object* obj, uintptr_t offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); - void PrepareDexCacheArraySlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PrepareDexCacheArraySlots() SHARED_REQUIRES(Locks::mutator_lock_); + void AssignImageBinSlot(mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_); void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); bool IsImageBinSlotAssigned(mirror::Object* object) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); + BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); - void AddMethodPointerArray(mirror::PointerArray* arr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void AddMethodPointerArray(mirror::PointerArray* arr) SHARED_REQUIRES(Locks::mutator_lock_); static void* GetImageAddressCallback(void* writer, mirror::Object* obj) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { return reinterpret_cast<ImageWriter*>(writer)->GetImageAddress(obj); } mirror::Object* GetLocalAddress(mirror::Object* object) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { size_t offset = GetImageOffset(object); uint8_t* dst = image_->Begin() + offset; return reinterpret_cast<mirror::Object*>(dst); @@ -199,84 +210,76 @@ class ImageWriter FINAL { const uint8_t* GetOatAddress(uint32_t offset) const { // With Quick, code is within the OatFile, as there are all in one // .o ELF object. - DCHECK_LT(offset, oat_file_->Size()); + DCHECK_LE(offset, oat_file_->Size()); DCHECK(oat_data_begin_ != nullptr); return offset == 0u ? nullptr : oat_data_begin_ + offset; } - static bool IsArtMethodBin(Bin bin) { - return bin == kBinArtMethodClean || bin == kBinArtMethodDirty; - } - // Returns true if the class was in the original requested image classes list. - bool IsImageClass(mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool IsImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); // Debug aid that list of requested image classes. void DumpImageClasses(); // Preinitializes some otherwise lazy fields (such as Class name) to avoid runtime image dirtying. void ComputeLazyFieldsForImageClasses() - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static bool ComputeLazyFieldsForClassesVisitor(mirror::Class* klass, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Wire dex cache resolved strings to strings in the image to avoid runtime resolution. - void ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ComputeEagerResolvedStrings() SHARED_REQUIRES(Locks::mutator_lock_); static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Remove unwanted classes from various roots. - void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static bool NonImageClassesVisitor(mirror::Class* c, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PruneNonImageClasses() SHARED_REQUIRES(Locks::mutator_lock_); // Verify unwanted classes removed. - void CheckNonImageClassesRemoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void CheckNonImageClassesRemoved() SHARED_REQUIRES(Locks::mutator_lock_); static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Lays out where the image objects will be at runtime. void CalculateNewObjectOffsets() - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); mirror::ObjectArray<mirror::Object>* CreateImageRoots() const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void CalculateObjectBinSlots(mirror::Object* obj) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void UnbinObjectsIntoOffset(mirror::Object* obj) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void WalkInstanceFields(mirror::Object* obj, mirror::Class* klass) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void WalkFieldsInOrder(mirror::Object* obj) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); static void WalkFieldsCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); static void UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Creates the contiguous image in memory and adjusts pointers. - void CopyAndFixupNativeData() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void CopyAndFixupNativeData() SHARED_REQUIRES(Locks::mutator_lock_); + void CopyAndFixupObjects() SHARED_REQUIRES(Locks::mutator_lock_); static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void CopyAndFixupObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); + void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void FixupClass(mirror::Class* orig, mirror::Class* copy) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void FixupObject(mirror::Object* orig, mirror::Object* copy) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); void FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr, mirror::Class* klass, - Bin array_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + Bin array_type) SHARED_REQUIRES(Locks::mutator_lock_); // Get quick code for non-resolution/imt_conflict/abstract method. const uint8_t* GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const uint8_t* GetQuickEntryPoint(ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // Patches references in OatFile to expect runtime addresses. void SetOatChecksumFromElfFile(File* elf_file); @@ -285,10 +288,15 @@ class ImageWriter FINAL { size_t GetBinSizeSum(Bin up_to = kBinSize) const; // Return true if a method is likely to be dirtied at runtime. - bool WillMethodBeDirty(ArtMethod* m) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool WillMethodBeDirty(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_); // Assign the offset for an ArtMethod. - void AssignMethodOffset(ArtMethod* method, Bin bin) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) + SHARED_REQUIRES(Locks::mutator_lock_); + + static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type); + + void* NativeLocationInImage(void* obj); const CompilerDriver& compiler_driver_; @@ -360,14 +368,22 @@ class ImageWriter FINAL { // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. - struct NativeObjectReloc { + struct NativeObjectRelocation { uintptr_t offset; - Bin bin_type; + NativeObjectRelocationType type; + + bool IsArtMethodRelocation() const { + return type == kNativeObjectRelocationTypeArtMethodClean || + type == kNativeObjectRelocationTypeArtMethodDirty; + } }; - std::unordered_map<void*, NativeObjectReloc> native_object_reloc_; + std::unordered_map<void*, NativeObjectRelocation> native_object_relocations_; // Runtime ArtMethods which aren't reachable from any Class but need to be copied into the image. ArtMethod* image_methods_[ImageHeader::kImageMethodsCount]; + // Fake length prefixed array for image methods. This array does not contain the actual + // ArtMethods. We only use it for the header and relocation addresses. + LengthPrefixedArray<ArtMethod> image_method_array_; // Counters for measurements, used for logging only. uint64_t dirty_methods_; @@ -376,6 +392,7 @@ class ImageWriter FINAL { friend class FixupClassVisitor; friend class FixupRootVisitor; friend class FixupVisitor; + friend class NonImageClassesVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); }; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index a1d8226f36..4215f3cdd3 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -55,7 +55,7 @@ extern "C" void jit_unload(void* handle) { } extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); DCHECK(jit_compiler != nullptr); return jit_compiler->CompileMethod(self, method); @@ -71,18 +71,20 @@ JitCompiler::JitCompiler() : total_time_(0) { CompilerOptions::kDefaultSmallMethodThreshold, CompilerOptions::kDefaultTinyMethodThreshold, CompilerOptions::kDefaultNumDexMethodsThreshold, - false, + CompilerOptions::kDefaultInlineDepthLimit, + CompilerOptions::kDefaultInlineMaxCodeUnits, + /* include_patch_information */ false, CompilerOptions::kDefaultTopKProfileThreshold, - false, // TODO: Think about debuggability of JIT-compiled code. + Runtime::Current()->IsDebuggable(), CompilerOptions::kDefaultGenerateDebugInfo, - false, - false, - false, - false, // pic - nullptr, + /* implicit_null_checks */ true, + /* implicit_so_checks */ true, + /* implicit_suspend_checks */ false, + /* pic */ true, // TODO: Support non-PIC in optimizing. + /* verbose_methods */ nullptr, pass_manager_options, - nullptr, - false)); + /* init_failure_output */ nullptr, + /* abort_on_hard_verifier_failure */ false)); const InstructionSet instruction_set = kRuntimeISA; instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines()); cumulative_logger_.reset(new CumulativeLogger("jit times")); @@ -92,10 +94,23 @@ JitCompiler::JitCompiler() : total_time_(0) { method_inliner_map_.get(), CompilerCallbacks::CallbackMode::kCompileApp)); compiler_driver_.reset(new CompilerDriver( - compiler_options_.get(), verification_results_.get(), method_inliner_map_.get(), - Compiler::kQuick, instruction_set, instruction_set_features_.get(), false, - nullptr, nullptr, nullptr, 1, false, true, - std::string(), cumulative_logger_.get(), -1, std::string())); + compiler_options_.get(), + verification_results_.get(), + method_inliner_map_.get(), + Compiler::kOptimizing, + instruction_set, + instruction_set_features_.get(), + /* image */ false, + /* image_classes */ nullptr, + /* compiled_classes */ nullptr, + /* compiled_methods */ nullptr, + /* thread_count */ 1, + /* dump_stats */ false, + /* dump_passes */ false, + /* dump_cfg_file_name */ "", + cumulative_logger_.get(), + /* swap_fd */ -1, + /* profile_file */ "")); // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); compiler_driver_->SetSupportBootImageFixup(false); @@ -138,7 +153,7 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { CompiledMethod* compiled_method = nullptr; { TimingLogger::ScopedTiming t2("Compiling", &logger); - compiled_method = compiler_driver_->CompileMethod(self, method); + compiled_method = compiler_driver_->CompileArtMethod(self, method); } { TimingLogger::ScopedTiming t2("TrimMaps", &logger); @@ -195,9 +210,14 @@ uint8_t* JitCompiler::WriteMethodHeaderAndCode(const CompiledMethod* compiled_me std::copy(quick_code->data(), quick_code->data() + code_size, code_ptr); // After we are done writing we need to update the method header. // Write out the method header last. - method_header = new(method_header)OatQuickMethodHeader( - code_ptr - mapping_table, code_ptr - vmap_table, code_ptr - gc_map, frame_size_in_bytes, - core_spill_mask, fp_spill_mask, code_size); + method_header = new(method_header) OatQuickMethodHeader( + (mapping_table == nullptr) ? 0 : code_ptr - mapping_table, + (vmap_table == nullptr) ? 0 : code_ptr - vmap_table, + (gc_map == nullptr) ? 0 : code_ptr - gc_map, + frame_size_in_bytes, + core_spill_mask, + fp_spill_mask, + code_size); // Return the code ptr. return code_ptr; } @@ -216,23 +236,35 @@ bool JitCompiler::AddToCodeCache(ArtMethod* method, const CompiledMethod* compil auto* const mapping_table = compiled_method->GetMappingTable(); auto* const vmap_table = compiled_method->GetVmapTable(); auto* const gc_map = compiled_method->GetGcMap(); - CHECK(gc_map != nullptr) << PrettyMethod(method); - // Write out pre-header stuff. - uint8_t* const mapping_table_ptr = code_cache->AddDataArray( - self, mapping_table->data(), mapping_table->data() + mapping_table->size()); - if (mapping_table_ptr == nullptr) { - return false; // Out of data cache. + uint8_t* mapping_table_ptr = nullptr; + uint8_t* vmap_table_ptr = nullptr; + uint8_t* gc_map_ptr = nullptr; + + if (mapping_table != nullptr) { + // Write out pre-header stuff. + mapping_table_ptr = code_cache->AddDataArray( + self, mapping_table->data(), mapping_table->data() + mapping_table->size()); + if (mapping_table_ptr == nullptr) { + return false; // Out of data cache. + } } - uint8_t* const vmap_table_ptr = code_cache->AddDataArray( - self, vmap_table->data(), vmap_table->data() + vmap_table->size()); - if (vmap_table_ptr == nullptr) { - return false; // Out of data cache. + + if (vmap_table != nullptr) { + vmap_table_ptr = code_cache->AddDataArray( + self, vmap_table->data(), vmap_table->data() + vmap_table->size()); + if (vmap_table_ptr == nullptr) { + return false; // Out of data cache. + } } - uint8_t* const gc_map_ptr = code_cache->AddDataArray( - self, gc_map->data(), gc_map->data() + gc_map->size()); - if (gc_map_ptr == nullptr) { - return false; // Out of data cache. + + if (gc_map != nullptr) { + gc_map_ptr = code_cache->AddDataArray( + self, gc_map->data(), gc_map->data() + gc_map->size()); + if (gc_map_ptr == nullptr) { + return false; // Out of data cache. + } } + // Don't touch this until you protect / unprotect the code. const size_t reserve_size = sizeof(OatQuickMethodHeader) + quick_code->size() + 32; uint8_t* const code_reserve = code_cache->ReserveCode(self, reserve_size); diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index b0010e0eb2..ef68caa5fa 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -38,11 +38,11 @@ class JitCompiler { static JitCompiler* Create(); virtual ~JitCompiler(); bool CompileMethod(Thread* self, ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); // This is in the compiler since the runtime doesn't have access to the compiled method // structures. bool AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method, - OatFile::OatMethod* out_method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + OatFile::OatMethod* out_method) SHARED_REQUIRES(Locks::mutator_lock_); CompilerCallbacks* GetCompilerCallbacks() const; size_t GetTotalCompileTime() const { return total_time_; @@ -63,7 +63,7 @@ class JitCompiler { const CompiledMethod* compiled_method, uint8_t* reserve_begin, uint8_t* reserve_end, const uint8_t* mapping_table, const uint8_t* vmap_table, const uint8_t* gc_map); bool MakeExecutable(CompiledMethod* compiled_method, ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); DISALLOW_COPY_AND_ASSIGN(JitCompiler); }; diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 016f28ef1e..0bfe8a276a 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -28,7 +28,7 @@ namespace art { // Run the tests only on host. -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ class JNICFITest : public CFITest { public: @@ -88,6 +88,6 @@ TEST_ISA(kX86_64) TEST_ISA(kMips) TEST_ISA(kMips64) -#endif // HAVE_ANDROID_OS +#endif // __ANDROID__ } // namespace art diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index dbecb8eb95..35b50937e9 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -48,7 +48,7 @@ class ArmManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon class ArmJniCallingConvention FINAL : public JniCallingConvention { public: - explicit ArmJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + ArmJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~ArmJniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index 9fd3265c86..37c92b2034 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -48,7 +48,7 @@ class Arm64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingC class Arm64JniCallingConvention FINAL : public JniCallingConvention { public: - explicit Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~Arm64JniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index c9b595aeea..243d124455 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -348,8 +348,8 @@ class JniCallingConvention : public CallingConvention { kObjectOrClass = 1 }; - explicit JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty, - size_t frame_pointer_size) + JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty, + size_t frame_pointer_size) : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {} // Number of stack slots for outgoing arguments, above which the handle scope is diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h index 8d82dceef4..dc45432410 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.h +++ b/compiler/jni/quick/mips/calling_convention_mips.h @@ -48,7 +48,7 @@ class MipsManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCo class MipsJniCallingConvention FINAL : public JniCallingConvention { public: - explicit MipsJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + MipsJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~MipsJniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h index dc9273b92a..3d6aab7399 100644 --- a/compiler/jni/quick/mips64/calling_convention_mips64.h +++ b/compiler/jni/quick/mips64/calling_convention_mips64.h @@ -48,7 +48,7 @@ class Mips64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCalling class Mips64JniCallingConvention FINAL : public JniCallingConvention { public: - explicit Mips64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + Mips64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~Mips64JniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index b1b3598a8e..cdf0956c9a 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -26,8 +26,7 @@ constexpr size_t kFramePointerSize = 4; class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { public: - explicit X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, - const char* shorty) + X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize), gpr_arg_count_(0) {} ~X86ManagedRuntimeCallingConvention() OVERRIDE {} @@ -51,7 +50,7 @@ class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon class X86JniCallingConvention FINAL : public JniCallingConvention { public: - explicit X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~X86JniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index 7a90c6e94e..6e47c9fae3 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -26,8 +26,7 @@ constexpr size_t kFramePointerSize = 8; class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { public: - explicit X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, - const char* shorty) + X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {} ~X86_64ManagedRuntimeCallingConvention() OVERRIDE {} // Calling convention @@ -47,7 +46,7 @@ class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCalling class X86_64JniCallingConvention FINAL : public JniCallingConvention { public: - explicit X86_64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); + X86_64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty); ~X86_64JniCallingConvention() OVERRIDE {} // Calling convention ManagedRegister ReturnRegister() OVERRIDE; diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index a057a4cf16..13f67e6fd4 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -50,7 +50,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { // We want to put the method3 at a very precise offset. const uint32_t method3_offset = method1_offset + distance_without_thunks; - CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader))); + CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment); // Calculate size of method2 so that we put method3 at the correct place. const uint32_t method2_offset = @@ -242,8 +242,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { }; constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + bool thunk_in_gap = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + max_positive_disp); ASSERT_FALSE(thunk_in_gap); // There should be no thunk. @@ -262,8 +264,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { }; constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - method3_code, method3_patches, + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + method3_code, + ArrayRef<const LinkerPatch>(method3_patches), just_over_max_negative_disp - bl_offset_in_method3); ASSERT_FALSE(thunk_in_gap); // There should be no thunk. @@ -282,8 +286,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { }; constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + bool thunk_in_gap = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + just_over_max_positive_disp); ASSERT_TRUE(thunk_in_gap); @@ -311,8 +317,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { }; constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - method3_code, method3_patches, + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + method3_code, + ArrayRef<const LinkerPatch>(method3_patches), just_over_max_negative_disp - bl_offset_in_method3); ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 29355d6968..6b9c530d7a 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -108,7 +108,7 @@ uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { if (!current_method_thunks_.empty()) { uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64); if (kIsDebugBuild) { - CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; CHECK_LE(num_thunks, processed_adrp_thunks_); for (size_t i = 0u; i != num_thunks; ++i) { @@ -203,7 +203,7 @@ void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, if ((adrp & 0x9f000000u) != 0x90000000u) { CHECK(fix_cortex_a53_843419_); CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> - CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; CHECK_LE(num_thunks, processed_adrp_thunks_); uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index 21f93672ad..b3af4c6a05 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -66,7 +66,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { // We want to put the method3 at a very precise offset. const uint32_t last_method_offset = method1_offset + distance_without_thunks; const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); - CHECK(IsAligned<kArm64Alignment>(gap_end)); + CHECK_ALIGNED(gap_end, kArm64Alignment); // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB). // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB @@ -396,8 +396,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { }; constexpr uint32_t max_positive_disp = 128 * MB - 4u; - uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches, - kNopCode, ArrayRef<const LinkerPatch>(), + uint32_t last_method_idx = Create2MethodsWithGap(method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + max_positive_disp); ASSERT_EQ(expected_last_method_idx, last_method_idx); @@ -420,8 +422,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { }; constexpr uint32_t max_negative_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), - last_method_code, last_method_patches, + uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, + ArrayRef<const LinkerPatch>(), + last_method_code, + ArrayRef<const LinkerPatch>(last_method_patches), max_negative_disp - bl_offset_in_last_method); uint32_t method1_offset = GetMethodOffset(1u); uint32_t last_method_offset = GetMethodOffset(last_method_idx); @@ -445,7 +449,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { constexpr uint32_t just_over_max_positive_disp = 128 * MB; uint32_t last_method_idx = Create2MethodsWithGap( - method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(), + method1_code, + ArrayRef<const LinkerPatch>(method1_patches), + kNopCode, + ArrayRef<const LinkerPatch>(), bl_offset_in_method1 + just_over_max_positive_disp); ASSERT_EQ(expected_last_method_idx, last_method_idx); @@ -474,7 +481,8 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches, + kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, + ArrayRef<const LinkerPatch>(last_method_patches), just_over_max_negative_disp - bl_offset_in_last_method); uint32_t method1_offset = GetMethodOffset(1u); uint32_t last_method_offset = GetMethodOffset(last_method_idx); diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 074775633f..88dc29e6ab 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -44,7 +44,7 @@ class OatTest : public CommonCompilerTest { void CheckMethod(ArtMethod* method, const OatFile::OatMethod& oat_method, const DexFile& dex_file) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { const CompiledMethod* compiled_method = compiler_driver_->GetCompiledMethod(MethodReference(&dex_file, method->GetDexMethodIndex())); @@ -183,7 +183,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(28U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index a98a3046e5..64e748776d 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -365,7 +365,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { OatClass* oat_class = writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -374,9 +374,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { uint32_t quick_code_offset = 0; const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); - CHECK(quick_code != nullptr); uint32_t code_size = quick_code->size() * sizeof(uint8_t); - CHECK_NE(code_size, 0U); uint32_t thumb_offset = compiled_method->CodeDelta(); // Deduplicate code arrays if we are not producing debuggable code. @@ -394,16 +392,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } } - MethodReference method_ref(dex_file_, it.GetMemberIndex()); - auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); - if (method_lb != writer_->method_offset_map_.map.end() && - !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { - // TODO: Should this be a hard failure? - LOG(WARNING) << "Multiple definitions of " - << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) - << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); - } else { - writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); + if (code_size != 0) { + MethodReference method_ref(dex_file_, it.GetMemberIndex()); + auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); + if (method_lb != writer_->method_offset_map_.map.end() && + !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { + // TODO: Should this be a hard failure? + LOG(WARNING) << "Multiple definitions of " + << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) + << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); + } else { + writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); + } } // Update quick method header. @@ -411,21 +411,24 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_]; uint32_t mapping_table_offset = method_header->mapping_table_offset_; uint32_t vmap_table_offset = method_header->vmap_table_offset_; + // If we don't have quick code, then we must have a vmap, as that is how the dex2dex + // compiler records its transformations. + DCHECK(quick_code != nullptr || vmap_table_offset != 0); uint32_t gc_map_offset = method_header->gc_map_offset_; // The code offset was 0 when the mapping/vmap table offset was set, so it's set // to 0-offset and we need to adjust it by code_offset. uint32_t code_offset = quick_code_offset - thumb_offset; - if (mapping_table_offset != 0u) { + if (mapping_table_offset != 0u && code_offset != 0u) { mapping_table_offset += code_offset; - DCHECK_LT(mapping_table_offset, code_offset); + DCHECK_LT(mapping_table_offset, code_offset) << "Overflow in oat offsets"; } - if (vmap_table_offset != 0u) { + if (vmap_table_offset != 0u && code_offset != 0u) { vmap_table_offset += code_offset; - DCHECK_LT(vmap_table_offset, code_offset); + DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets"; } - if (gc_map_offset != 0u) { + if (gc_map_offset != 0u && code_offset != 0u) { gc_map_offset += code_offset; - DCHECK_LT(gc_map_offset, code_offset); + DCHECK_LT(gc_map_offset, code_offset) << "Overflow in oat offsets"; } uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes(); uint32_t core_spill_mask = compiled_method->GetCoreSpillMask(); @@ -534,7 +537,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { const ClassDataItemIterator& it, uint32_t thumb_offset) { offset_ = writer_->relative_patcher_->ReserveSpace( - offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); + offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); offset_ = compiled_method->AlignCode(offset_); DCHECK_ALIGNED_PARAM(offset_, GetInstructionSetAlignment(compiled_method->GetInstructionSet())); @@ -557,7 +560,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { OatClass* oat_class = writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -598,7 +601,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { OatClass* oat_class = writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -619,15 +622,19 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr, invoke_type); if (method == nullptr) { - LOG(ERROR) << "Unexpected failure to resolve a method: " - << PrettyMethod(it.GetMemberIndex(), *dex_file_, true); + LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: " + << PrettyMethod(it.GetMemberIndex(), *dex_file_, true); soa.Self()->AssertPendingException(); mirror::Throwable* exc = soa.Self()->GetException(); std::string dump = exc->Dump(); LOG(FATAL) << dump; + UNREACHABLE(); + } + + if (compiled_method != nullptr && compiled_method->GetQuickCode()->size() != 0) { + method->SetEntryPointFromQuickCompiledCodePtrSize( + reinterpret_cast<void*>(offsets.code_offset_), pointer_size_); } - method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_), - pointer_size_); return true; } @@ -658,7 +665,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } bool StartClass(const DexFile* dex_file, size_t class_def_index) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { OatDexMethodVisitor::StartClass(dex_file, class_def_index); if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) { dex_cache_ = class_linker_->FindDexCache(*dex_file); @@ -666,7 +673,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return true; } - bool EndClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + bool EndClass() SHARED_REQUIRES(Locks::mutator_lock_) { bool result = OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { DCHECK(result); // OatDexMethodVisitor::EndClass() never fails. @@ -680,7 +687,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { OatClass* oat_class = writer_->oat_classes_[oat_class_index_]; const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -689,85 +696,82 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { OutputStream* out = out_; const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); - if (quick_code != nullptr) { - // Need a wrapper if we create a copy for patching. - ArrayRef<const uint8_t> wrapped(*quick_code); - uint32_t code_size = quick_code->size() * sizeof(uint8_t); - CHECK_NE(code_size, 0U); - - // Deduplicate code arrays. - const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; - if (method_offsets.code_offset_ >= offset_) { - offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); - if (offset_ == 0u) { - ReportWriteFailure("relative call thunk", it); - return false; - } - uint32_t aligned_offset = compiled_method->AlignCode(offset_); - uint32_t aligned_code_delta = aligned_offset - offset_; - if (aligned_code_delta != 0) { - if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { - ReportWriteFailure("code alignment padding", it); - return false; - } - offset_ += aligned_code_delta; - DCHECK_OFFSET_(); - } - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); - DCHECK_EQ(method_offsets.code_offset_, - offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) - << PrettyMethod(it.GetMemberIndex(), *dex_file_); - const OatQuickMethodHeader& method_header = - oat_class->method_headers_[method_offsets_index_]; - writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); - if (!out->WriteFully(&method_header, sizeof(method_header))) { - ReportWriteFailure("method header", it); + // Need a wrapper if we create a copy for patching. + ArrayRef<const uint8_t> wrapped(*quick_code); + uint32_t code_size = quick_code->size() * sizeof(uint8_t); + + // Deduplicate code arrays. + const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; + if (method_offsets.code_offset_ > offset_) { + offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); + if (offset_ == 0u) { + ReportWriteFailure("relative call thunk", it); + return false; + } + uint32_t aligned_offset = compiled_method->AlignCode(offset_); + uint32_t aligned_code_delta = aligned_offset - offset_; + if (aligned_code_delta != 0) { + if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { + ReportWriteFailure("code alignment padding", it); return false; } - writer_->size_method_header_ += sizeof(method_header); - offset_ += sizeof(method_header); + offset_ += aligned_code_delta; DCHECK_OFFSET_(); + } + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + DCHECK_EQ(method_offsets.code_offset_, + offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) + << PrettyMethod(it.GetMemberIndex(), *dex_file_); + const OatQuickMethodHeader& method_header = + oat_class->method_headers_[method_offsets_index_]; + writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); + if (!out->WriteFully(&method_header, sizeof(method_header))) { + ReportWriteFailure("method header", it); + return false; + } + writer_->size_method_header_ += sizeof(method_header); + offset_ += sizeof(method_header); + DCHECK_OFFSET_(); - if (!compiled_method->GetPatches().empty()) { - patched_code_.assign(quick_code->begin(), quick_code->end()); - wrapped = ArrayRef<const uint8_t>(patched_code_); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() == kLinkerPatchCallRelative) { - // NOTE: Relative calls across oat files are not supported. - uint32_t target_offset = GetTargetOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, - offset_ + literal_offset, target_offset); - } else if (patch.Type() == kLinkerPatchDexCacheArray) { - uint32_t target_offset = GetDexCacheOffset(patch); - uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, - offset_ + literal_offset, - target_offset); - } else if (patch.Type() == kLinkerPatchCall) { - uint32_t target_offset = GetTargetOffset(patch); - PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); - } else if (patch.Type() == kLinkerPatchMethod) { - ArtMethod* method = GetTargetMethod(patch); - PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method); - } else if (patch.Type() == kLinkerPatchType) { - mirror::Class* type = GetTargetType(patch); - PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type); - } + if (!compiled_method->GetPatches().empty()) { + patched_code_.assign(quick_code->begin(), quick_code->end()); + wrapped = ArrayRef<const uint8_t>(patched_code_); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + // NOTE: Relative calls across oat files are not supported. + uint32_t target_offset = GetTargetOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, + offset_ + literal_offset, target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = GetDexCacheOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, + offset_ + literal_offset, + target_offset); + } else if (patch.Type() == kLinkerPatchCall) { + uint32_t target_offset = GetTargetOffset(patch); + PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); + } else if (patch.Type() == kLinkerPatchMethod) { + ArtMethod* method = GetTargetMethod(patch); + PatchMethodAddress(&patched_code_, patch.LiteralOffset(), method); + } else if (patch.Type() == kLinkerPatchType) { + mirror::Class* type = GetTargetType(patch); + PatchObjectAddress(&patched_code_, patch.LiteralOffset(), type); } } + } - writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size); - if (!out->WriteFully(wrapped.data(), code_size)) { - ReportWriteFailure("method code", it); - return false; - } - writer_->size_code_ += code_size; - offset_ += code_size; + writer_->oat_header_->UpdateChecksum(wrapped.data(), code_size); + if (!out->WriteFully(wrapped.data(), code_size)) { + ReportWriteFailure("method code", it); + return false; } - DCHECK_OFFSET_(); + writer_->size_code_ += code_size; + offset_ += code_size; } + DCHECK_OFFSET_(); ++method_offsets_index_; } @@ -789,7 +793,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } ArtMethod* GetTargetMethod(const LinkerPatch& patch) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { MethodReference ref = patch.TargetMethod(); mirror::DexCache* dex_cache = (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(*ref.dex_file); @@ -799,7 +803,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return method; } - uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod()); uint32_t target_offset = (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u; @@ -824,7 +828,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } mirror::Class* GetTargetType(const LinkerPatch& patch) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { mirror::DexCache* dex_cache = (dex_file_ == patch.TargetTypeDexFile()) ? dex_cache_ : class_linker_->FindDexCache(*patch.TargetTypeDexFile()); mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex()); @@ -832,7 +836,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return type; } - uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { if (writer_->image_writer_ != nullptr) { auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress( patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); @@ -845,7 +849,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { // NOTE: Direct method pointers across oat files don't use linker patches. However, direct // type pointers across oat files do. (TODO: Investigate why.) if (writer_->image_writer_ != nullptr) { @@ -861,7 +865,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { // NOTE: Direct method pointers across oat files don't use linker patches. However, direct // type pointers across oat files do. (TODO: Investigate why.) if (writer_->image_writer_ != nullptr) { @@ -878,7 +882,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { uint32_t address = writer_->image_writer_ == nullptr ? target_offset : PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + writer_->oat_data_offset_ + target_offset); diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 82b9377c07..760fb7c12c 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -165,9 +165,9 @@ class OatWriter { size_t InitOatClasses(size_t offset); size_t InitOatMaps(size_t offset); size_t InitOatCode(size_t offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); size_t InitOatCodeDexFiles(size_t offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); bool WriteTables(OutputStream* out, const size_t file_offset); size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset); @@ -178,7 +178,7 @@ class OatWriter { class OatDexFile { public: - explicit OatDexFile(size_t offset, const DexFile& dex_file); + OatDexFile(size_t offset, const DexFile& dex_file); size_t SizeOf() const; void UpdateChecksum(OatHeader* oat_header) const; bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const; @@ -200,10 +200,10 @@ class OatWriter { class OatClass { public: - explicit OatClass(size_t offset, - const std::vector<CompiledMethod*>& compiled_methods, - uint32_t num_non_null_compiled_methods, - mirror::Class::Status status); + OatClass(size_t offset, + const std::vector<CompiledMethod*>& compiled_methods, + uint32_t num_non_null_compiled_methods, + mirror::Class::Status status); ~OatClass(); size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const; size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const; diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 329112a377..84201c39a7 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -154,11 +154,6 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { // entry block. Any following blocks would have had the join block // as a dominator, and `MergeWith` handles changing that to the // entry block. - - // Remove the original condition if it is now unused. - if (!if_condition->HasUses()) { - if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition); - } } void HBooleanSimplifier::Run() { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index bb1b40a1b5..88414980b8 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -333,18 +333,42 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) return; } - const size_t num_blocks = graph_->GetBlocks().Size(); - ArenaBitVector can_block_throw(arena_, num_blocks, false); + // Bit vector stores information on which blocks contain throwing instructions. + // Must be expandable because catch blocks may be split into two. + ArenaBitVector can_block_throw(arena_, graph_->GetBlocks().Size(), /* expandable */ true); // Scan blocks and mark those which contain throwing instructions. - for (size_t block_id = 0; block_id < num_blocks; ++block_id) { + for (size_t block_id = 0, e = graph_->GetBlocks().Size(); block_id < e; ++block_id) { HBasicBlock* block = graph_->GetBlocks().Get(block_id); + bool can_throw = false; for (HInstructionIterator insn(block->GetInstructions()); !insn.Done(); insn.Advance()) { if (insn.Current()->CanThrow()) { - can_block_throw.SetBit(block_id); + can_throw = true; break; } } + + if (can_throw) { + if (block->IsCatchBlock()) { + // Catch blocks are always considered an entry point into the TryItem in + // order to avoid splitting exceptional edges. We split the block after + // the move-exception (if present) and mark the first part non-throwing. + // Later on, a TryBoundary will be inserted between the two blocks. + HInstruction* first_insn = block->GetFirstInstruction(); + if (first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after the + // StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); + } else { + // Catch block does not load the exception. Split at the beginning to + // create an empty catch block. + block = block->SplitBefore(first_insn); + } + } + can_block_throw.SetBit(block->GetBlockId()); + } } // Iterate over all blocks, find those covered by some TryItem and: @@ -353,7 +377,7 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) // (c) link the new blocks to corresponding exception handlers. // We cannot iterate only over blocks in `branch_targets_` because switch-case // blocks share the same dex_pc. - for (size_t block_id = 0; block_id < num_blocks; ++block_id) { + for (size_t block_id = 0, e = graph_->GetBlocks().Size(); block_id < e; ++block_id) { HBasicBlock* try_block = graph_->GetBlocks().Get(block_id); // TryBoundary blocks are added at the end of the list and not iterated over. @@ -365,57 +389,35 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) continue; } - if (try_block->IsCatchBlock()) { - // Catch blocks are always considered an entry point into the TryItem in - // order to avoid splitting exceptional edges (they might not have been - // created yet). We separate the move-exception (if present) from the - // rest of the block and insert a TryBoundary after it, creating a - // landing pad for the exceptional edges. - HInstruction* first_insn = try_block->GetFirstInstruction(); - HInstruction* split_position = nullptr; - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after the - // StoreLocal that must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - split_position = first_insn->GetNext()->GetNext(); - } else { - // Catch block does not obtain the exception. Split at the beginning - // to create an empty catch block. - split_position = first_insn; - } - DCHECK(split_position != nullptr); - HBasicBlock* catch_block = try_block; - try_block = catch_block->SplitBefore(split_position); - SplitTryBoundaryEdge(catch_block, try_block, HTryBoundary::kEntry, code_item, *try_item); - } else { - // For non-catch blocks, find predecessors which are not covered by the - // same TryItem range. Such edges enter the try block and will have - // a TryBoundary inserted. - for (size_t i = 0; i < try_block->GetPredecessors().Size(); ++i) { - HBasicBlock* predecessor = try_block->GetPredecessors().Get(i); - if (predecessor->IsSingleTryBoundary()) { - // The edge was already split because of an exit from a neighbouring - // TryItem. We split it again and insert an entry point. - if (kIsDebugBuild) { - HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary(); - const DexFile::TryItem* predecessor_try_item = - GetTryItem(predecessor->GetSinglePredecessor(), code_item, can_block_throw); - DCHECK(!last_insn->IsEntry()); - DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block); - DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i)); - DCHECK_NE(try_item, predecessor_try_item); - } - } else if (GetTryItem(predecessor, code_item, can_block_throw) != try_item) { - // This is an entry point into the TryItem and the edge has not been - // split yet. That means that `predecessor` is not in a TryItem, or - // it is in a different TryItem and we happened to iterate over this - // block first. We split the edge and insert an entry point. - } else { - // Not an edge on the boundary of the try block. - continue; + // Catch blocks were split earlier and cannot throw. + DCHECK(!try_block->IsCatchBlock()); + + // Find predecessors which are not covered by the same TryItem range. Such + // edges enter the try block and will have a TryBoundary inserted. + for (size_t i = 0; i < try_block->GetPredecessors().Size(); ++i) { + HBasicBlock* predecessor = try_block->GetPredecessors().Get(i); + if (predecessor->IsSingleTryBoundary()) { + // The edge was already split because of an exit from a neighbouring + // TryItem. We split it again and insert an entry point. + if (kIsDebugBuild) { + HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary(); + const DexFile::TryItem* predecessor_try_item = + GetTryItem(predecessor->GetSinglePredecessor(), code_item, can_block_throw); + DCHECK(!last_insn->IsEntry()); + DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block); + DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i)); + DCHECK_NE(try_item, predecessor_try_item); } - SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item); + } else if (GetTryItem(predecessor, code_item, can_block_throw) != try_item) { + // This is an entry point into the TryItem and the edge has not been + // split yet. That means that `predecessor` is not in a TryItem, or + // it is in a different TryItem and we happened to iterate over this + // block first. We split the edge and insert an entry point. + } else { + // Not an edge on the boundary of the try block. + continue; } + SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item); } // Find successors which are not covered by the same TryItem range. Such @@ -755,6 +757,35 @@ void HGraphBuilder::BuildReturn(const Instruction& instruction, Primitive::Type current_block_ = nullptr; } +void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register, + uint32_t dex_pc, + HInvoke* actual_string) { + if (!graph_->IsDebuggable()) { + // Notify that we cannot compile with baseline. The dex registers aliasing + // with `original_dex_register` will be handled when we optimize + // (see HInstructionSimplifer::VisitFakeString). + can_use_baseline_for_string_init_ = false; + return; + } + const VerifiedMethod* verified_method = + compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex()); + if (verified_method != nullptr) { + UpdateLocal(original_dex_register, actual_string); + const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map = + verified_method->GetStringInitPcRegMap(); + auto map_it = string_init_map.find(dex_pc); + if (map_it != string_init_map.end()) { + std::set<uint32_t> reg_set = map_it->second; + for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) { + HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot); + UpdateLocal(*set_it, load_local); + } + } + } else { + can_use_baseline_for_string_init_ = false; + } +} + bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, @@ -774,7 +805,9 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, invoke_type = kDirect; break; case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: invoke_type = kVirtual; break; case Instruction::INVOKE_INTERFACE: @@ -996,34 +1029,23 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { // Add the class initialization check as last input of `invoke`. DCHECK(clinit_check != nullptr); + DCHECK(!is_string_init); invoke->SetArgumentAt(argument_index, clinit_check); + argument_index++; } - current_block_->AddInstruction(invoke); - latest_result_ = invoke; - // Add move-result for StringFactory method. if (is_string_init) { uint32_t orig_this_reg = is_range ? register_index : args[0]; - UpdateLocal(orig_this_reg, invoke); - const VerifiedMethod* verified_method = - compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex()); - if (verified_method == nullptr) { - LOG(WARNING) << "No verified method for method calling String.<init>: " - << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_); - return false; - } - const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map = - verified_method->GetStringInitPcRegMap(); - auto map_it = string_init_map.find(dex_pc); - if (map_it != string_init_map.end()) { - std::set<uint32_t> reg_set = map_it->second; - for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) { - HInstruction* load_local = LoadLocal(orig_this_reg, Primitive::kPrimNot); - UpdateLocal(*set_it, load_local); - } - } + HInstruction* fake_string = LoadLocal(orig_this_reg, Primitive::kPrimNot); + invoke->SetArgumentAt(argument_index, fake_string); + current_block_->AddInstruction(invoke); + PotentiallySimplifyFakeString(orig_this_reg, dex_pc, invoke); + } else { + current_block_->AddInstruction(invoke); } + latest_result_ = invoke; + return true; } @@ -1032,7 +1054,15 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, bool is_put) { uint32_t source_or_dest_reg = instruction.VRegA_22c(); uint32_t obj_reg = instruction.VRegB_22c(); - uint16_t field_index = instruction.VRegC_22c(); + uint16_t field_index; + if (instruction.IsQuickened()) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + field_index = LookupQuickenedInfo(dex_pc); + } else { + field_index = instruction.VRegC_22c(); + } ScopedObjectAccess soa(Thread::Current()); ArtField* resolved_field = @@ -1541,6 +1571,17 @@ void HGraphBuilder::PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex } } +bool HGraphBuilder::CanDecodeQuickenedInfo() const { + return interpreter_metadata_ != nullptr; +} + +uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) { + DCHECK(interpreter_metadata_ != nullptr); + uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_); + DCHECK_EQ(dex_pc, dex_pc_in_map); + return DecodeUnsignedLeb128(&interpreter_metadata_); +} + bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) { if (current_block_ == nullptr) { return true; // Dead code @@ -1638,6 +1679,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::RETURN_VOID_NO_BARRIER: case Instruction::RETURN_VOID: { BuildReturn(instruction, Primitive::kPrimVoid); break; @@ -1686,8 +1728,17 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::INVOKE_INTERFACE: case Instruction::INVOKE_STATIC: case Instruction::INVOKE_SUPER: - case Instruction::INVOKE_VIRTUAL: { - uint32_t method_idx = instruction.VRegB_35c(); + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_QUICK: { + uint16_t method_idx; + if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + method_idx = LookupQuickenedInfo(dex_pc); + } else { + method_idx = instruction.VRegB_35c(); + } uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); uint32_t args[5]; instruction.GetVarArgs(args); @@ -1702,8 +1753,17 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::INVOKE_INTERFACE_RANGE: case Instruction::INVOKE_STATIC_RANGE: case Instruction::INVOKE_SUPER_RANGE: - case Instruction::INVOKE_VIRTUAL_RANGE: { - uint32_t method_idx = instruction.VRegB_3rc(); + case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: { + uint16_t method_idx; + if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) { + if (!CanDecodeQuickenedInfo()) { + return false; + } + method_idx = LookupQuickenedInfo(dex_pc); + } else { + method_idx = instruction.VRegB_3rc(); + } uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); uint32_t register_index = instruction.VRegC(); if (!BuildInvoke(instruction, dex_pc, method_idx, @@ -2238,10 +2298,10 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_INSTANCE: { uint16_t type_index = instruction.VRegB_21c(); if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) { - // Turn new-instance of string into a const 0. int32_t register_index = instruction.VRegA(); - HNullConstant* constant = graph_->GetNullConstant(); - UpdateLocal(register_index, constant); + HFakeString* fake_string = new (arena_) HFakeString(); + current_block_->AddInstruction(fake_string); + UpdateLocal(register_index, fake_string); } else { QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) ? kQuickAllocObjectWithAccessCheck @@ -2328,27 +2388,27 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::CMP_LONG: { - Binop_23x_cmp(instruction, Primitive::kPrimLong, kNoBias, dex_pc); + Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc); break; } case Instruction::CMPG_FLOAT: { - Binop_23x_cmp(instruction, Primitive::kPrimFloat, kGtBias, dex_pc); + Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc); break; } case Instruction::CMPG_DOUBLE: { - Binop_23x_cmp(instruction, Primitive::kPrimDouble, kGtBias, dex_pc); + Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc); break; } case Instruction::CMPL_FLOAT: { - Binop_23x_cmp(instruction, Primitive::kPrimFloat, kLtBias, dex_pc); + Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc); break; } case Instruction::CMPL_DOUBLE: { - Binop_23x_cmp(instruction, Primitive::kPrimDouble, kLtBias, dex_pc); + Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc); break; } @@ -2356,12 +2416,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; case Instruction::IGET: + case Instruction::IGET_QUICK: case Instruction::IGET_WIDE: + case Instruction::IGET_WIDE_QUICK: case Instruction::IGET_OBJECT: + case Instruction::IGET_OBJECT_QUICK: case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BOOLEAN_QUICK: case Instruction::IGET_BYTE: + case Instruction::IGET_BYTE_QUICK: case Instruction::IGET_CHAR: - case Instruction::IGET_SHORT: { + case Instruction::IGET_CHAR_QUICK: + case Instruction::IGET_SHORT: + case Instruction::IGET_SHORT_QUICK: { if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) { return false; } @@ -2369,12 +2436,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::IPUT: + case Instruction::IPUT_QUICK: case Instruction::IPUT_WIDE: + case Instruction::IPUT_WIDE_QUICK: case Instruction::IPUT_OBJECT: + case Instruction::IPUT_OBJECT_QUICK: case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BOOLEAN_QUICK: case Instruction::IPUT_BYTE: + case Instruction::IPUT_BYTE_QUICK: case Instruction::IPUT_CHAR: - case Instruction::IPUT_SHORT: { + case Instruction::IPUT_CHAR_QUICK: + case Instruction::IPUT_SHORT: + case Instruction::IPUT_SHORT_QUICK: { if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) { return false; } @@ -2479,6 +2553,7 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::MOVE_EXCEPTION: { current_block_->AddInstruction(new (arena_) HLoadException()); UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction()); + current_block_->AddInstruction(new (arena_) HClearException()); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 7098eb86a5..ad5d92345b 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -39,7 +39,8 @@ class HGraphBuilder : public ValueObject { const DexCompilationUnit* const outer_compilation_unit, const DexFile* dex_file, CompilerDriver* driver, - OptimizingCompilerStats* compiler_stats) + OptimizingCompilerStats* compiler_stats, + const uint8_t* interpreter_metadata) : arena_(graph->GetArena()), branch_targets_(graph->GetArena(), 0), locals_(graph->GetArena(), 0), @@ -54,7 +55,9 @@ class HGraphBuilder : public ValueObject { return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])), code_start_(nullptr), latest_result_(nullptr), - compilation_stats_(compiler_stats) {} + can_use_baseline_for_string_init_(true), + compilation_stats_(compiler_stats), + interpreter_metadata_(interpreter_metadata) {} // Only for unit testing. HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt) @@ -72,10 +75,15 @@ class HGraphBuilder : public ValueObject { return_type_(return_type), code_start_(nullptr), latest_result_(nullptr), + can_use_baseline_for_string_init_(true), compilation_stats_(nullptr) {} bool BuildGraph(const DexFile::CodeItem& code); + bool CanUseBaselineForStringInit() const { + return can_use_baseline_for_string_init_; + } + static constexpr const char* kBuilderPassName = "builder"; private: @@ -114,6 +122,9 @@ class HGraphBuilder : public ValueObject { const DexFile::CodeItem& code_item, const DexFile::TryItem& try_item); + bool CanDecodeQuickenedInfo() const; + uint16_t LookupQuickenedInfo(uint32_t dex_pc); + void InitializeLocals(uint16_t count); HLocal* GetLocalAt(int register_index) const; void UpdateLocal(int register_index, HInstruction* instruction) const; @@ -251,6 +262,10 @@ class HGraphBuilder : public ValueObject { // Returns whether `type_index` points to the outer-most compiling method's class. bool IsOutermostCompilingClass(uint16_t type_index) const; + void PotentiallySimplifyFakeString(uint16_t original_dex_register, + uint32_t dex_pc, + HInvoke* invoke); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for @@ -290,8 +305,15 @@ class HGraphBuilder : public ValueObject { // used by move-result instructions. HInstruction* latest_result_; + // We need to know whether we have built a graph that has calls to StringFactory + // and hasn't gone through the verifier. If the following flag is `false`, then + // we cannot compile with baseline. + bool can_use_baseline_for_string_init_; + OptimizingCompilerStats* compilation_stats_; + const uint8_t* interpreter_metadata_; + DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4607ebe548..d0b5ffd255 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1005,7 +1005,39 @@ void CodeGenerator::EmitParallelMoves(Location from1, GetMoveResolver()->EmitNativeCode(¶llel_move); } -void SlowPathCode::RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc) { +void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path) { + // Ensure that the call kind indication given to the register allocator is + // coherent with the runtime call generated, and that the GC side effect is + // set when required. + if (slow_path == nullptr) { + DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName(); + DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) + << instruction->DebugName() << instruction->GetSideEffects().ToString(); + } else { + DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal()) + << instruction->DebugName() << slow_path->GetDescription(); + DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || + // Control flow would not come back into the code if a fatal slow + // path is taken, so we do not care if it triggers GC. + slow_path->IsFatal() || + // HDeoptimize is a special case: we know we are not coming back from + // it into the code. + instruction->IsDeoptimize()) + << instruction->DebugName() << instruction->GetSideEffects().ToString() + << slow_path->GetDescription(); + } + + // Check the coherency of leaf information. + DCHECK(instruction->IsSuspendCheck() + || ((slow_path != nullptr) && slow_path->IsFatal()) + || instruction->GetLocations()->CanCall() + || !IsLeafMethod()) + << instruction->DebugName() << ((slow_path != nullptr) ? slow_path->GetDescription() : ""); +} + +void SlowPathCode::RecordPcInfo(CodeGenerator* codegen, + HInstruction* instruction, + uint32_t dex_pc) { codegen->RecordPcInfo(instruction, dex_pc, this); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index eb63b49884..25824448c5 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -98,6 +98,8 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { return saved_fpu_stack_offsets_[reg]; } + virtual bool IsFatal() const { return false; } + virtual const char* GetDescription() const = 0; protected: @@ -290,6 +292,8 @@ class CodeGenerator { return type == Primitive::kPrimNot && !value->IsNullConstant(); } + void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); + void AddAllocatedRegister(Location location) { allocated_registers_.Add(location); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e3683ef0dd..1bd42160d7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -69,6 +69,8 @@ class NullCheckSlowPathARM : public SlowPathCodeARM { QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; } private: @@ -87,6 +89,8 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM { QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; } private: @@ -161,6 +165,8 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM { QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; } private: @@ -334,7 +340,7 @@ class DeoptimizationSlowPathARM : public SlowPathCodeARM { #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> -inline Condition ARMCondition(IfCondition cond) { +inline Condition ARMSignedOrFPCondition(IfCondition cond) { switch (cond) { case kCondEQ: return EQ; case kCondNE: return NE; @@ -342,24 +348,22 @@ inline Condition ARMCondition(IfCondition cond) { case kCondLE: return LE; case kCondGT: return GT; case kCondGE: return GE; - default: - LOG(FATAL) << "Unknown if condition"; } - return EQ; // Unreachable. + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } -inline Condition ARMOppositeCondition(IfCondition cond) { +inline Condition ARMUnsignedCondition(IfCondition cond) { switch (cond) { - case kCondEQ: return NE; - case kCondNE: return EQ; - case kCondLT: return GE; - case kCondLE: return GT; - case kCondGT: return LE; - case kCondGE: return LT; - default: - LOG(FATAL) << "Unknown if condition"; + case kCondEQ: return EQ; + case kCondNE: return NE; + case kCondLT: return LO; + case kCondLE: return LS; + case kCondGT: return HI; + case kCondGE: return HS; } - return EQ; // Unreachable. + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -949,15 +953,10 @@ void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { + ValidateInvokeRuntime(instruction, slow_path); __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); __ blx(LR); RecordPcInfo(instruction, dex_pc, slow_path); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || instruction->GetLocations()->CanCall() - || !IsLeafMethod()); } void InstructionCodeGeneratorARM::HandleGoto(HInstruction* got, HBasicBlock* successor) { @@ -1008,6 +1007,142 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); } +void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) { + ShifterOperand operand; + if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) { + __ cmp(left, operand); + } else { + Register temp = IP; + __ LoadImmediate(temp, right); + __ cmp(left, ShifterOperand(temp)); + } +} + +void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, + Label* true_label, + Label* false_label) { + __ vmstat(); // transfer FP status register to ARM APSR. + if (cond->IsFPConditionTrueIfNaN()) { + __ b(true_label, VS); // VS for unordered. + } else if (cond->IsFPConditionFalseIfNaN()) { + __ b(false_label, VS); // VS for unordered. + } + __ b(true_label, ARMSignedOrFPCondition(cond->GetCondition())); +} + +void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, + Label* true_label, + Label* false_label) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + Condition final_condition = ARMUnsignedCondition(if_cond); + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + GenerateCompareWithImmediate(left_high, val_high); + if (if_cond == kCondNE) { + __ b(true_label, ARMSignedOrFPCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMSignedOrFPCondition(false_high_cond)); + } else { + __ b(true_label, ARMSignedOrFPCondition(true_high_cond)); + __ b(false_label, ARMSignedOrFPCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + GenerateCompareWithImmediate(left_low, val_low); + } else { + Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); + + __ cmp(left_high, ShifterOperand(right_high)); + if (if_cond == kCondNE) { + __ b(true_label, ARMSignedOrFPCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMSignedOrFPCondition(false_high_cond)); + } else { + __ b(true_label, ARMSignedOrFPCondition(true_high_cond)); + __ b(false_label, ARMSignedOrFPCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ cmp(left_low, ShifterOperand(right_low)); + } + // The last comparison might be unsigned. + __ b(true_label, final_condition); +} + +void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr, + HCondition* condition, + Label* true_target, + Label* false_target, + Label* always_true_target) { + LocationSummary* locations = condition->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + // We don't want true_target as a nullptr. + if (true_target == nullptr) { + true_target = always_true_target; + } + bool falls_through = (false_target == nullptr); + + // FP compares don't like null false_targets. + if (false_target == nullptr) { + false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + } + + Primitive::Type type = condition->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimLong: + GenerateLongComparesAndJumps(condition, true_target, false_target); + break; + case Primitive::kPrimFloat: + __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); + GenerateFPJumps(condition, true_target, false_target); + break; + case Primitive::kPrimDouble: + __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); + GenerateFPJumps(condition, true_target, false_target); + break; + default: + LOG(FATAL) << "Unexpected compare type " << type; + } + + if (!falls_through) { + __ b(false_target); + } +} + void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, Label* true_target, Label* false_target, @@ -1033,25 +1168,27 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. + Primitive::Type type = + cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + // Is this a long or FP comparison that has been folded into the HCondition? + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + // Generate the comparison directly. + GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), + true_target, false_target, always_true_target); + return; + } + LocationSummary* locations = cond->GetLocations(); DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); Register left = locations->InAt(0).AsRegister<Register>(); - if (locations->InAt(1).IsRegister()) { - __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ cmp(left, ShifterOperand(right.AsRegister<Register>())); } else { - DCHECK(locations->InAt(1).IsConstant()); - HConstant* constant = locations->InAt(1).GetConstant(); - int32_t value = CodeGenerator::GetInt32ValueOf(constant); - ShifterOperand operand; - if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { - __ cmp(left, operand); - } else { - Register temp = IP; - __ LoadImmediate(temp, value); - __ cmp(left, ShifterOperand(temp)); - } + DCHECK(right.IsConstant()); + GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } - __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); + __ b(true_target, ARMSignedOrFPCondition(cond->AsCondition()->GetCondition())); } } if (false_target != nullptr) { @@ -1104,37 +1241,88 @@ void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { void LocationsBuilderARM::VisitCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (cond->NeedsMaterialization()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // Handle the long/FP comparisons made in instruction simplification. + switch (cond->InputAt(0)->GetType()) { + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + if (cond->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + if (cond->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } + break; + + default: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + if (cond->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } } } void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { - if (!cond->NeedsMaterialization()) return; + if (!cond->NeedsMaterialization()) { + return; + } + LocationSummary* locations = cond->GetLocations(); - Register left = locations->InAt(0).AsRegister<Register>(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + Register out = locations->Out().AsRegister<Register>(); + Label true_label, false_label; - if (locations->InAt(1).IsRegister()) { - __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); - } else { - DCHECK(locations->InAt(1).IsConstant()); - int32_t value = CodeGenerator::GetInt32ValueOf(locations->InAt(1).GetConstant()); - ShifterOperand operand; - if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { - __ cmp(left, operand); - } else { - Register temp = IP; - __ LoadImmediate(temp, value); - __ cmp(left, ShifterOperand(temp)); + switch (cond->InputAt(0)->GetType()) { + default: { + // Integer case. + if (right.IsRegister()) { + __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>())); + } else { + DCHECK(right.IsConstant()); + GenerateCompareWithImmediate(left.AsRegister<Register>(), + CodeGenerator::GetInt32ValueOf(right.GetConstant())); + } + __ it(ARMSignedOrFPCondition(cond->GetCondition()), kItElse); + __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), + ARMSignedOrFPCondition(cond->GetCondition())); + __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0), + ARMSignedOrFPCondition(cond->GetOppositeCondition())); + return; } + case Primitive::kPrimLong: + GenerateLongComparesAndJumps(cond, &true_label, &false_label); + break; + case Primitive::kPrimFloat: + __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); + GenerateFPJumps(cond, &true_label, &false_label); + break; + case Primitive::kPrimDouble: + __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); + GenerateFPJumps(cond, &true_label, &false_label); + break; } - __ it(ARMCondition(cond->GetCondition()), kItElse); - __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), - ARMCondition(cond->GetCondition())); - __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0), - ARMOppositeCondition(cond->GetCondition())); + + // Convert the jumps into the result. + Label done_label; + + // False case: result = 0. + __ Bind(&false_label); + __ LoadImmediate(out, 0); + __ b(&done_label); + + // True case: result = 1. + __ Bind(&true_label); + __ LoadImmediate(out, 1); + __ Bind(&done_label); } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -2588,6 +2776,9 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) Location value = locations->InAt(0); switch (instruction->GetType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: case Primitive::kPrimInt: { if (value.IsRegister()) { __ CompareAndBranchIfZero(value.AsRegister<Register>(), slow_path->GetEntryLabel()); @@ -2913,7 +3104,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { ShifterOperand(right.AsRegisterPairHigh<Register>())); // Signed compare. __ b(&less, LT); __ b(&greater, GT); - // Do LoadImmediate before any `cmp`, as LoadImmediate might affect the status flags. + // Do LoadImmediate before the last `cmp`, as LoadImmediate might affect the status flags. __ LoadImmediate(out, 0); __ cmp(left.AsRegisterPairLow<Register>(), ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare. @@ -2936,7 +3127,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { LOG(FATAL) << "Unexpected compare type " << type; } __ b(&done, EQ); - __ b(&less, CC); // CC is for both: unsigned compare for longs and 'less than' for floats. + __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats. __ Bind(&greater); __ LoadImmediate(out, 1); @@ -3710,7 +3901,7 @@ void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) { Register length = locations->InAt(1).AsRegister<Register>(); __ cmp(index, ShifterOperand(length)); - __ b(slow_path->GetEntryLabel(), CS); + __ b(slow_path->GetEntryLabel(), HS); } void CodeGeneratorARM::MarkGCCard(Register temp, @@ -4103,6 +4294,10 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ Bind(slow_path->GetExitLabel()); } +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kArmWordSize>().Int32Value(); +} + void LocationsBuilderARM::VisitLoadException(HLoadException* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); @@ -4111,10 +4306,16 @@ void LocationsBuilderARM::VisitLoadException(HLoadException* load) { void InstructionCodeGeneratorARM::VisitLoadException(HLoadException* load) { Register out = load->GetLocations()->Out().AsRegister<Register>(); - int32_t offset = Thread::ExceptionOffset<kArmWordSize>().Int32Value(); - __ LoadFromOffset(kLoadWord, out, TR, offset); + __ LoadFromOffset(kLoadWord, out, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderARM::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorARM::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { __ LoadImmediate(IP, 0); - __ StoreToOffset(kStoreWord, IP, TR, offset); + __ StoreToOffset(kStoreWord, IP, TR, GetExceptionTlsOffset()); } void LocationsBuilderARM::VisitThrow(HThrow* instruction) { @@ -4365,6 +4566,18 @@ void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderARM::VisitFakeString(HFakeString* instruction) { + DCHECK(codegen_->IsBaseline()); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant())); +} + +void InstructionCodeGeneratorARM::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) { + DCHECK(codegen_->IsBaseline()); + // Will be generated at use site. +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1d10293b58..53bd766dd4 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -207,6 +207,14 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { Label* true_target, Label* false_target, Label* always_true_target); + void GenerateCompareWithImmediate(Register left, int32_t right); + void GenerateCompareTestAndBranch(HIf* if_instr, + HCondition* condition, + Label* true_target, + Label* false_target, + Label* always_true_target); + void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); + void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index f64e801285..b8ac421935 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -77,10 +77,9 @@ inline Condition ARM64Condition(IfCondition cond) { case kCondLE: return le; case kCondGT: return gt; case kCondGE: return ge; - default: - LOG(FATAL) << "Unknown if condition"; } - return nv; // Unreachable. + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } Location ARM64ReturnLocation(Primitive::Type return_type) { @@ -213,6 +212,8 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } private: @@ -235,6 +236,8 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } private: @@ -345,6 +348,8 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } private: @@ -355,8 +360,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction, - HBasicBlock* successor) + SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { @@ -657,6 +661,13 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, Primitive::Type type = instruction->GetType(); DCHECK_NE(type, Primitive::kPrimVoid); + if (instruction->IsFakeString()) { + // The fake string is an alias for null. + DCHECK(IsBaseline()); + instruction = locations->Out().GetConstant(); + DCHECK(instruction->IsNullConstant()) << instruction->DebugName(); + } + if (instruction->IsCurrentMethod()) { MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset)); } else if (locations != nullptr && locations->Out().Equals(location)) { @@ -905,7 +916,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri (source.IsFpuRegister() == Primitive::IsFloatingPointType(type))); __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination)); } else if (source.IsConstant()) { - DCHECK(unspecified_type || CoherentConstantAndType(source, type)); + DCHECK(unspecified_type || CoherentConstantAndType(source, type)) << source << " " << type; UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; @@ -1091,15 +1102,11 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { + ValidateInvokeRuntime(instruction, slow_path); BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); RecordPcInfo(instruction, dex_pc, slow_path); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || !IsLeafMethod()); } void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, @@ -1646,6 +1653,11 @@ void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } +static bool IsFloatingPointZeroConstant(HInstruction* instruction) { + return (instruction->IsFloatConstant() && (instruction->AsFloatConstant()->GetValue() == 0.0f)) + || (instruction->IsDoubleConstant() && (instruction->AsDoubleConstant()->GetValue() == 0.0)); +} + void LocationsBuilderARM64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); @@ -1660,13 +1672,10 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - HInstruction* right = compare->InputAt(1); - if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || - (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) { - locations->SetInAt(1, Location::ConstantLocation(right->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } + locations->SetInAt(1, + IsFloatingPointZeroConstant(compare->InputAt(1)) + ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) + : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; } @@ -1697,12 +1706,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { Register result = OutputRegister(compare); FPRegister left = InputFPRegisterAt(compare, 0); if (compare->GetLocations()->InAt(1).IsConstant()) { - if (kIsDebugBuild) { - HInstruction* right = compare->GetLocations()->InAt(1).GetConstant(); - DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || - (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))); - } - // 0.0 is the only immediate that can be encoded directly in a FCMP instruction. + DCHECK(IsFloatingPointZeroConstant(compare->GetLocations()->InAt(1).GetConstant())); + // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. __ Fcmp(left, 0.0); } else { __ Fcmp(left, InputFPRegisterAt(compare, 1)); @@ -1722,8 +1727,19 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { void LocationsBuilderARM64::VisitCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); + + if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, + IsFloatingPointZeroConstant(instruction->InputAt(1)) + ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) + : Location::RequiresFpuRegister()); + } else { + // Integer cases. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); + } + if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -1735,13 +1751,34 @@ void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { } LocationSummary* locations = instruction->GetLocations(); - Register lhs = InputRegisterAt(instruction, 0); - Operand rhs = InputOperandAt(instruction, 1); Register res = RegisterFrom(locations->Out(), instruction->GetType()); - Condition cond = ARM64Condition(instruction->GetCondition()); - - __ Cmp(lhs, rhs); - __ Cset(res, cond); + IfCondition if_cond = instruction->GetCondition(); + Condition arm64_cond = ARM64Condition(if_cond); + + if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { + FPRegister lhs = InputFPRegisterAt(instruction, 0); + if (locations->InAt(1).IsConstant()) { + DCHECK(IsFloatingPointZeroConstant(locations->InAt(1).GetConstant())); + // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. + __ Fcmp(lhs, 0.0); + } else { + __ Fcmp(lhs, InputFPRegisterAt(instruction, 1)); + } + __ Cset(res, arm64_cond); + if (instruction->IsFPConditionTrueIfNaN()) { + // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1 + __ Csel(res, res, Operand(1), vc); // VC for "not unordered". + } else if (instruction->IsFPConditionFalseIfNaN()) { + // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0 + __ Csel(res, res, Operand(0), vc); // VC for "not unordered". + } + } else { + // Integer cases. + Register lhs = InputRegisterAt(instruction, 0); + Operand rhs = InputOperandAt(instruction, 1); + __ Cmp(lhs, rhs); + __ Cset(res, arm64_cond); + } } #define FOR_EACH_CONDITION_INSTRUCTION(M) \ @@ -1962,8 +1999,8 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction Primitive::Type type = instruction->GetType(); - if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) { - LOG(FATAL) << "Unexpected type " << type << "for DivZeroCheck."; + if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) { + LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; return; } @@ -2073,33 +2110,58 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. - Register lhs = InputRegisterAt(condition, 0); - Operand rhs = InputOperandAt(condition, 1); - Condition arm64_cond = ARM64Condition(condition->GetCondition()); - if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { - switch (arm64_cond) { - case eq: - __ Cbz(lhs, true_target); - break; - case ne: - __ Cbnz(lhs, true_target); - break; - case lt: - // Test the sign bit and branch accordingly. - __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); - break; - case ge: - // Test the sign bit and branch accordingly. - __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); - break; - default: - // Without the `static_cast` the compiler throws an error for - // `-Werror=sign-promo`. - LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); + Primitive::Type type = + cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + + if (Primitive::IsFloatingPointType(type)) { + // FP compares don't like null false_targets. + if (false_target == nullptr) { + false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor()); + } + FPRegister lhs = InputFPRegisterAt(condition, 0); + if (condition->GetLocations()->InAt(1).IsConstant()) { + DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant())); + // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. + __ Fcmp(lhs, 0.0); + } else { + __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); } + if (condition->IsFPConditionTrueIfNaN()) { + __ B(vs, true_target); // VS for unordered. + } else if (condition->IsFPConditionFalseIfNaN()) { + __ B(vs, false_target); // VS for unordered. + } + __ B(ARM64Condition(condition->GetCondition()), true_target); } else { - __ Cmp(lhs, rhs); - __ B(arm64_cond, true_target); + // Integer cases. + Register lhs = InputRegisterAt(condition, 0); + Operand rhs = InputOperandAt(condition, 1); + Condition arm64_cond = ARM64Condition(condition->GetCondition()); + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + switch (arm64_cond) { + case eq: + __ Cbz(lhs, true_target); + break; + case ne: + __ Cbnz(lhs, true_target); + break; + case lt: + // Test the sign bit and branch accordingly. + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + case ge: + // Test the sign bit and branch accordingly. + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + default: + // Without the `static_cast` the compiler throws an error for + // `-Werror=sign-promo`. + LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); + } + } else { + __ Cmp(lhs, rhs); + __ B(arm64_cond, true_target); + } } } if (false_target != nullptr) { @@ -2435,6 +2497,10 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { } } +static MemOperand GetExceptionTlsAddress() { + return MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value()); +} + void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); @@ -2442,9 +2508,15 @@ void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { } void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { - MemOperand exception = MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value()); - __ Ldr(OutputRegister(instruction), exception); - __ Str(wzr, exception); + __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); +} + +void LocationsBuilderARM64::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + __ Str(wzr, GetExceptionTlsAddress()); } void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) { @@ -3039,6 +3111,18 @@ void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderARM64::VisitFakeString(HFakeString* instruction) { + DCHECK(codegen_->IsBaseline()); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant())); +} + +void InstructionCodeGeneratorARM64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) { + DCHECK(codegen_->IsBaseline()); + // Will be generated at use site. +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2c610380ed..ac7ee10a5b 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -191,7 +191,7 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { class LocationsBuilderARM64 : public HGraphVisitor { public: - explicit LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen) + LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen) : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index aa4fd26590..167e025383 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -138,6 +138,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; } private: @@ -162,6 +164,8 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; } private: @@ -278,6 +282,8 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; } private: @@ -288,8 +294,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, - HBasicBlock* successor) + SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor) : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { @@ -971,15 +976,11 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { + ValidateInvokeRuntime(instruction, slow_path); // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); RecordPcInfo(instruction, dex_pc, slow_path); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || !IsLeafMethod()); } void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, @@ -1894,8 +1895,9 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio Primitive::Type type = instruction->GetType(); - if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) { + if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; + return; } if (value.IsConstant()) { @@ -2544,6 +2546,10 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { } } +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kMips64WordSize>().Int32Value(); +} + void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); @@ -2552,8 +2558,15 @@ void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) { void InstructionCodeGeneratorMIPS64::VisitLoadException(HLoadException* load) { GpuRegister out = load->GetLocations()->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadUnsignedWord, out, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value()); - __ StoreToOffset(kStoreWord, ZERO, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value()); + __ LoadFromOffset(kLoadUnsignedWord, out, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderMIPS64::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset()); } void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) { @@ -3292,5 +3305,17 @@ void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual VisitCondition(comp); } +void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) { + DCHECK(codegen_->IsBaseline()); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant())); +} + +void InstructionCodeGeneratorMIPS64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) { + DCHECK(codegen_->IsBaseline()); + // Will be generated at use site. +} + } // namespace mips64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index be71443b15..091a3e5d92 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -45,17 +45,23 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> +#define QUICK_ENTRY_POINT(x) Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x)) class NullCheckSlowPathX86 : public SlowPathCodeX86 { public: explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + instruction_, + instruction_->GetDexPc(), + this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; } private: @@ -68,11 +74,16 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 { explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero))); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), + instruction_, + instruction_->GetDexPc(), + this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; } private: @@ -82,7 +93,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 { class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 { public: - explicit DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {} + DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); @@ -124,10 +135,14 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + instruction_, + instruction_->GetDexPc(), + this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; } private: @@ -147,8 +162,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -190,8 +207,10 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + instruction_, + instruction_->GetDexPc(), + this); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -224,10 +243,9 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex())); - __ fs()->call(Address::Absolute(do_clinit_ - ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage) - : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType))); - RecordPcInfo(codegen, at_, dex_pc_); + x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) + : QUICK_ENTRY_POINT(pInitializeType), + at_, dex_pc_, this); // Move the class to the desired location. Location out = locations->Out(); @@ -291,11 +309,16 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, - pInstanceofNonTrivial))); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + instruction_, + instruction_->GetDexPc(), + this); } else { DCHECK(instruction_->IsCheckCast()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast))); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), + instruction_, + instruction_->GetDexPc(), + this); } RecordPcInfo(codegen, instruction_, dex_pc_); @@ -324,9 +347,13 @@ class DeoptimizationSlowPathX86 : public SlowPathCodeX86 { : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize))); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); // No need to restore live registers. DCHECK(instruction_->IsDeoptimize()); HDeoptimize* deoptimize = instruction_->AsDeoptimize(); @@ -344,7 +371,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCodeX86 { #undef __ #define __ down_cast<X86Assembler*>(GetAssembler())-> -inline Condition X86Condition(IfCondition cond) { +inline Condition X86SignedCondition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; case kCondNE: return kNotEqual; @@ -352,10 +379,22 @@ inline Condition X86Condition(IfCondition cond) { case kCondLE: return kLessEqual; case kCondGT: return kGreater; case kCondGE: return kGreaterEqual; - default: - LOG(FATAL) << "Unknown if condition"; } - return kEqual; + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +inline Condition X86UnsignedOrFPCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kBelow; + case kCondLE: return kBelowEqual; + case kCondGT: return kAbove; + case kCondGE: return kAboveEqual; + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -386,6 +425,15 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 return GetFloatingPointSpillSlotSize(); } +void CodeGeneratorX86::InvokeRuntime(Address entry_point, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { + ValidateInvokeRuntime(instruction, slow_path); + __ fs()->call(entry_point); + RecordPcInfo(instruction, dex_pc, slow_path); +} + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options) @@ -892,46 +940,12 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label) { - bool gt_bias = cond->IsGtBias(); - IfCondition if_cond = cond->GetCondition(); - Condition ccode = X86Condition(if_cond); - switch (if_cond) { - case kCondEQ: - if (!gt_bias) { - __ j(kParityEven, false_label); - } - break; - case kCondNE: - if (!gt_bias) { - __ j(kParityEven, true_label); - } - break; - case kCondLT: - if (gt_bias) { - __ j(kParityEven, false_label); - } - ccode = kBelow; - break; - case kCondLE: - if (gt_bias) { - __ j(kParityEven, false_label); - } - ccode = kBelowEqual; - break; - case kCondGT: - if (gt_bias) { - __ j(kParityEven, true_label); - } - ccode = kAbove; - break; - case kCondGE: - if (gt_bias) { - __ j(kParityEven, true_label); - } - ccode = kAboveEqual; - break; + if (cond->IsFPConditionTrueIfNaN()) { + __ j(kUnordered, true_label); + } else if (cond->IsFPConditionFalseIfNaN()) { + __ j(kUnordered, false_label); } - __ j(ccode, true_label); + __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label); } void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, @@ -942,43 +956,37 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, Location right = locations->InAt(1); IfCondition if_cond = cond->GetCondition(); - Register left_low = left.AsRegisterPairLow<Register>(); Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); IfCondition true_high_cond = if_cond; IfCondition false_high_cond = cond->GetOppositeCondition(); - Condition final_condition = X86Condition(if_cond); + Condition final_condition = X86UnsignedOrFPCondition(if_cond); // Set the conditions for the test, remembering that == needs to be // decided using the low words. switch (if_cond) { case kCondEQ: - false_high_cond = kCondNE; - break; case kCondNE: - false_high_cond = kCondEQ; + // Nothing to do. break; case kCondLT: false_high_cond = kCondGT; - final_condition = kBelow; break; case kCondLE: true_high_cond = kCondLT; - final_condition = kBelowEqual; break; case kCondGT: false_high_cond = kCondLT; - final_condition = kAbove; break; case kCondGE: true_high_cond = kCondGT; - final_condition = kAboveEqual; break; } if (right.IsConstant()) { int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - int32_t val_low = Low32Bits(value); int32_t val_high = High32Bits(value); + int32_t val_low = Low32Bits(value); if (val_high == 0) { __ testl(left_high, left_high); @@ -986,12 +994,12 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ cmpl(left_high, Immediate(val_high)); } if (if_cond == kCondNE) { - __ j(X86Condition(true_high_cond), true_label); + __ j(X86SignedCondition(true_high_cond), true_label); } else if (if_cond == kCondEQ) { - __ j(X86Condition(false_high_cond), false_label); + __ j(X86SignedCondition(false_high_cond), false_label); } else { - __ j(X86Condition(true_high_cond), true_label); - __ j(X86Condition(false_high_cond), false_label); + __ j(X86SignedCondition(true_high_cond), true_label); + __ j(X86SignedCondition(false_high_cond), false_label); } // Must be equal high, so compare the lows. if (val_low == 0) { @@ -1000,17 +1008,17 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ cmpl(left_low, Immediate(val_low)); } } else { - Register right_low = right.AsRegisterPairLow<Register>(); Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); __ cmpl(left_high, right_high); if (if_cond == kCondNE) { - __ j(X86Condition(true_high_cond), true_label); + __ j(X86SignedCondition(true_high_cond), true_label); } else if (if_cond == kCondEQ) { - __ j(X86Condition(false_high_cond), false_label); + __ j(X86SignedCondition(false_high_cond), false_label); } else { - __ j(X86Condition(true_high_cond), true_label); - __ j(X86Condition(false_high_cond), false_label); + __ j(X86SignedCondition(true_high_cond), true_label); + __ j(X86SignedCondition(false_high_cond), false_label); } // Must be equal high, so compare the lows. __ cmpl(left_low, right_low); @@ -1045,12 +1053,10 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr, GenerateLongComparesAndJumps(condition, true_target, false_target); break; case Primitive::kPrimFloat: - DCHECK(right.IsFpuRegister()); __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); GenerateFPJumps(condition, true_target, false_target); break; case Primitive::kPrimDouble: - DCHECK(right.IsFpuRegister()); __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); GenerateFPJumps(condition, true_target, false_target); break; @@ -1080,7 +1086,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio DCHECK_EQ(cond_value, 0); } } else { - bool materialized = + bool is_materialized = !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it @@ -1089,8 +1095,8 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; bool eflags_set = cond->IsCondition() && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) - && type == Primitive::kPrimInt; - if (materialized) { + && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type)); + if (is_materialized) { if (!eflags_set) { // Materialized condition, compare against 0. Location lhs = instruction->GetLocations()->InAt(0); @@ -1101,9 +1107,12 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + __ j(X86SignedCondition(cond->AsCondition()->GetCondition()), true_target); } } else { + // Condition has not been materialized, use its inputs as the + // comparison and its condition as the branch condition. + // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. @@ -1114,6 +1123,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio always_true_target); return; } + Location lhs = cond->GetLocations()->InAt(0); Location rhs = cond->GetLocations()->InAt(1); // LHS is guaranteed to be in a register (see @@ -1130,7 +1140,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + __ j(X86SignedCondition(cond->AsCondition()->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -1288,7 +1298,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ setb(X86Condition(cond->GetCondition()), reg); + __ setb(X86SignedCondition(cond->GetCondition()), reg); return; } case Primitive::kPrimLong: @@ -1307,12 +1317,12 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { // Convert the jumps into the result. Label done_label; - // false case: result = 0; + // False case: result = 0. __ Bind(&false_label); __ xorl(reg, reg); __ jmp(&done_label); - // True case: result = 1 + // True case: result = 1. __ Bind(&true_label); __ movl(reg, Immediate(1)); __ Bind(&done_label); @@ -2041,14 +2051,18 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimFloat: // Processing a Dex `float-to-long' instruction. - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pF2l))); - codegen_->RecordPcInfo(conversion, conversion->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l), + conversion, + conversion->GetDexPc(), + nullptr); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pD2l))); - codegen_->RecordPcInfo(conversion, conversion->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l), + conversion, + conversion->GetDexPc(), + nullptr); break; default: @@ -2374,7 +2388,12 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { case Primitive::kPrimInt: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + if (mul->InputAt(1)->IsIntConstant()) { + // Can use 3 operand multiply. + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::SameAsFirstInput()); + } break; case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); @@ -2402,21 +2421,24 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { LocationSummary* locations = mul->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); + Location out = locations->Out(); switch (mul->GetResultType()) { - case Primitive::kPrimInt: { - if (second.IsRegister()) { + case Primitive::kPrimInt: + // The constant may have ended up in a register, so test explicitly to avoid + // problems where the output may not be the same as the first operand. + if (mul->InputAt(1)->IsIntConstant()) { + Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); + __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm); + } else if (second.IsRegister()) { + DCHECK(first.Equals(out)); __ imull(first.AsRegister<Register>(), second.AsRegister<Register>()); - } else if (second.IsConstant()) { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ imull(first.AsRegister<Register>(), imm); } else { DCHECK(second.IsStackSlot()); + DCHECK(first.Equals(out)); __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } break; - } case Primitive::kPrimLong: { Register in1_hi = first.AsRegisterPairHigh<Register>(); @@ -2797,9 +2819,15 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>()); if (is_div) { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv))); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), + instruction, + instruction->GetDexPc(), + nullptr); } else { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLmod))); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), + instruction, + instruction->GetDexPc(), + nullptr); } uint32_t dex_pc = is_div ? instruction->AsDiv()->GetDexPc() @@ -2955,6 +2983,9 @@ void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); switch (instruction->GetType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: case Primitive::kPrimInt: { locations->SetInAt(0, Location::Any()); break; @@ -2982,6 +3013,9 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) Location value = locations->InAt(0); switch (instruction->GetType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: case Primitive::kPrimInt: { if (value.IsRegister()) { __ testl(value.AsRegister<Register>(), value.AsRegister<Register>()); @@ -3251,9 +3285,11 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); - - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime( + Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint())), + instruction, + instruction->GetDexPc(), + nullptr); DCHECK(!codegen_->IsLeafMethod()); } @@ -3273,9 +3309,11 @@ void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); - - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime( + Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint())), + instruction, + instruction->GetDexPc(), + nullptr); DCHECK(!codegen_->IsLeafMethod()); } @@ -4178,8 +4216,10 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { DCHECK(!codegen_->IsLeafMethod()); // Note: if heap poisoning is enabled, pAputObject takes cares // of poisoning the reference. - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAputObject))); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction, + instruction->GetDexPc(), + nullptr); } break; } @@ -4561,7 +4601,11 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary. + DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { @@ -4707,6 +4751,10 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { __ Bind(slow_path->GetExitLabel()); } +static Address GetExceptionTlsAddress() { + return Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value()); +} + void LocationsBuilderX86::VisitLoadException(HLoadException* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); @@ -4714,9 +4762,15 @@ void LocationsBuilderX86::VisitLoadException(HLoadException* load) { } void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) { - Address address = Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value()); - __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), address); - __ fs()->movl(address, Immediate(0)); + __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress()); +} + +void LocationsBuilderX86::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + __ fs()->movl(GetExceptionTlsAddress(), Immediate(0)); } void LocationsBuilderX86::VisitThrow(HThrow* instruction) { @@ -4727,8 +4781,10 @@ void LocationsBuilderX86::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeliverException))); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), + instruction, + instruction->GetDexPc(), + nullptr); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -4839,10 +4895,11 @@ void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) } void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) { - __ fs()->call(Address::Absolute(instruction->IsEnter() - ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLockObject) - : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pUnlockObject))); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) + : QUICK_ENTRY_POINT(pUnlockObject), + instruction, + instruction->GetDexPc(), + nullptr); } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -4990,6 +5047,18 @@ void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderX86::VisitFakeString(HFakeString* instruction) { + DCHECK(codegen_->IsBaseline()); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant())); +} + +void InstructionCodeGeneratorX86::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) { + DCHECK(codegen_->IsBaseline()); + // Will be generated at use site. +} + #undef __ } // namespace x86 diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65d6e0a6c4..2e3d4d4bf7 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -232,6 +232,12 @@ class CodeGeneratorX86 : public CodeGenerator { size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + // Generate code to invoke a runtime entry point. + void InvokeRuntime(Address entry_point, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path); + size_t GetWordSize() const OVERRIDE { return kX86WordSize; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ddaa60db5e..2c5cef3822 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -47,18 +47,23 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 static constexpr int kC2ConditionMask = 0x400; #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> +#define QUICK_ENTRY_POINT(x) Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x), true) class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true)); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + instruction_, + instruction_->GetDexPc(), + this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } private: @@ -71,12 +76,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 { explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowDivZero), true)); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), + instruction_, + instruction_->GetDexPc(), + this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } private: @@ -86,7 +95,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 { class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { public: - explicit DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div) + DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div) : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { @@ -95,7 +104,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { if (is_div_) { __ negl(cpu_reg_); } else { - __ movl(cpu_reg_, Immediate(0)); + __ xorl(cpu_reg_, cpu_reg_); } } else { @@ -120,15 +129,17 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: - explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) + SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true)); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), + instruction_, + instruction_->GetDexPc(), + this); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -166,6 +177,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 { length_location_(length_location) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. @@ -177,11 +189,12 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true)); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + instruction_, instruction_->GetDexPc(), this); } + bool IsFatal() const OVERRIDE { return true; } + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } private: @@ -211,10 +224,9 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - __ gs()->call(Address::Absolute((do_clinit_ - ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage) - : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)), true)); - RecordPcInfo(codegen, at_, dex_pc_); + x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) + : QUICK_ENTRY_POINT(pInitializeType), + at_, dex_pc_, this); Location out = locations->Out(); // Move the class to the desired location. @@ -261,9 +273,10 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true)); - RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + instruction_, + instruction_->GetDexPc(), + this); x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -309,14 +322,17 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInstanceofNonTrivial), true)); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + instruction_, + dex_pc_, + this); } else { DCHECK(instruction_->IsCheckCast()); - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pCheckCast), true)); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), + instruction_, + dex_pc_, + this); } - RecordPcInfo(codegen, instruction_, dex_pc_); if (instruction_->IsInstanceOf()) { x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); @@ -343,14 +359,15 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 { : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true)); DCHECK(instruction_->IsDeoptimize()); HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - codegen->RecordPcInfo(instruction_, dex_pc, this); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + deoptimize, + deoptimize->GetDexPc(), + this); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -363,7 +380,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 { #undef __ #define __ down_cast<X86_64Assembler*>(GetAssembler())-> -inline Condition X86_64Condition(IfCondition cond) { +inline Condition X86_64IntegerCondition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; case kCondNE: return kNotEqual; @@ -371,10 +388,22 @@ inline Condition X86_64Condition(IfCondition cond) { case kCondLE: return kLessEqual; case kCondGT: return kGreater; case kCondGE: return kGreaterEqual; - default: - LOG(FATAL) << "Unknown if condition"; } - return kEqual; + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +inline Condition X86_64FPCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kBelow; + case kCondLE: return kBelowEqual; + case kCondGT: return kAbove; + case kCondGE: return kAboveEqual; + }; + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, @@ -451,6 +480,15 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin return kX86_64WordSize; } +void CodeGeneratorX86_64::InvokeRuntime(Address entry_point, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { + ValidateInvokeRuntime(instruction, slow_path); + __ gs()->call(entry_point); + RecordPcInfo(instruction, dex_pc, slow_path); +} + static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); @@ -699,8 +737,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(constant->IsLongConstant()); value = constant->AsLongConstant()->GetValue(); } - Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + Store64BitValueToStack(destination, value); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -734,8 +771,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, if (location.IsRegister()) { Load64BitValue(location.AsRegister<CpuRegister>(), value); } else if (location.IsDoubleStackSlot()) { - Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); + Store64BitValueToStack(location, value); } else { DCHECK(location.IsConstant()); DCHECK_EQ(location.GetConstant(), const_to_move); @@ -836,46 +872,12 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label) { - bool gt_bias = cond->IsGtBias(); - IfCondition if_cond = cond->GetCondition(); - Condition ccode = X86_64Condition(if_cond); - switch (if_cond) { - case kCondEQ: - if (!gt_bias) { - __ j(kParityEven, false_label); - } - break; - case kCondNE: - if (!gt_bias) { - __ j(kParityEven, true_label); - } - break; - case kCondLT: - if (gt_bias) { - __ j(kParityEven, false_label); - } - ccode = kBelow; - break; - case kCondLE: - if (gt_bias) { - __ j(kParityEven, false_label); - } - ccode = kBelowEqual; - break; - case kCondGT: - if (gt_bias) { - __ j(kParityEven, true_label); - } - ccode = kAbove; - break; - case kCondGE: - if (gt_bias) { - __ j(kParityEven, true_label); - } - ccode = kAboveEqual; - break; + if (cond->IsFPConditionTrueIfNaN()) { + __ j(kUnordered, true_label); + } else if (cond->IsFPConditionFalseIfNaN()) { + __ j(kUnordered, false_label); } - __ j(ccode, true_label); + __ j(X86_64FPCondition(cond->GetCondition()), true_label); } void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, @@ -911,7 +913,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); } } else { - // Value won't fit in an 32-bit integer. + // Value won't fit in a 32-bit integer. __ cmpq(left_reg, codegen_->LiteralInt64Address(value)); } } else if (right.IsDoubleStackSlot()) { @@ -919,7 +921,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, } else { __ cmpq(left_reg, right.AsRegister<CpuRegister>()); } - __ j(X86_64Condition(condition->GetCondition()), true_target); + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); break; } case Primitive::kPrimFloat: { @@ -978,7 +980,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc DCHECK_EQ(cond_value, 0); } } else { - bool materialized = + bool is_materialized = !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it @@ -989,7 +991,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) && !Primitive::IsFloatingPointType(type); - if (materialized) { + if (is_materialized) { if (!eflags_set) { // Materialized condition, compare against 0. Location lhs = instruction->GetLocations()->InAt(0); @@ -1001,16 +1003,20 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc } __ j(kNotEqual, true_target); } else { - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); + __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); } } else { + // Condition has not been materialized, use its inputs as the + // comparison and its condition as the branch condition. + // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly + // Generate the comparison directly. GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), true_target, false_target, always_true_target); return; } + Location lhs = cond->GetLocations()->InAt(0); Location rhs = cond->GetLocations()->InAt(1); if (rhs.IsRegister()) { @@ -1026,7 +1032,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); + __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -1175,7 +1181,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ setcc(X86_64Condition(cond->GetCondition()), reg); + __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; case Primitive::kPrimLong: // Clear output register: setcc only sets the low byte. @@ -1198,7 +1204,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { } else { __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ setcc(X86_64Condition(cond->GetCondition()), reg); + __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; case Primitive::kPrimFloat: { XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); @@ -1231,12 +1237,12 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { // Convert the jumps into the result. Label done_label; - // false case: result = 0; + // False case: result = 0. __ Bind(&false_label); __ xorl(reg, reg); __ jmp(&done_label); - // True case: result = 1 + // True case: result = 1. __ Bind(&true_label); __ movl(reg, Immediate(1)); __ Bind(&done_label); @@ -1829,14 +1835,12 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { // Processing a Dex `float-to-int' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-int' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1864,14 +1868,12 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { // Processing a Dex `float-to-long' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -2047,14 +2049,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; __ movl(output, Immediate(kPrimIntMax)); - // temp = int-to-float(output) - __ cvtsi2ss(temp, output, false); - // if input >= temp goto done - __ comiss(input, temp); + // if input >= (float)INT_MAX goto done + __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2072,14 +2071,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; __ movl(output, Immediate(kPrimIntMax)); - // temp = int-to-double(output) - __ cvtsi2sd(temp, output); - // if input >= temp goto done - __ comisd(input, temp); + // if input >= (double)INT_MAX goto done + __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2117,14 +2113,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; codegen_->Load64BitValue(output, kPrimLongMax); - // temp = long-to-float(output) - __ cvtsi2ss(temp, output, true); - // if input >= temp goto done - __ comiss(input, temp); + // if input >= (float)LONG_MAX goto done + __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2142,14 +2135,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; codegen_->Load64BitValue(output, kPrimLongMax); - // temp = long-to-double(output) - __ cvtsi2sd(temp, output, true); - // if input >= temp goto done - __ comisd(input, temp); + // if input >= (double)LONG_MAX goto done + __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2553,13 +2543,19 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + if (mul->InputAt(1)->IsIntConstant()) { + // Can use 3 operand multiply. + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::SameAsFirstInput()); + } break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1))); - if (locations->InAt(1).IsConstant()) { + locations->SetInAt(1, Location::Any()); + if (mul->InputAt(1)->IsLongConstant() && + IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) { // Can use 3 operand multiply. locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } else { @@ -2584,37 +2580,51 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { LocationSummary* locations = mul->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); + Location out = locations->Out(); switch (mul->GetResultType()) { - case Primitive::kPrimInt: { - DCHECK(first.Equals(locations->Out())); - if (second.IsRegister()) { + case Primitive::kPrimInt: + // The constant may have ended up in a register, so test explicitly to avoid + // problems where the output may not be the same as the first operand. + if (mul->InputAt(1)->IsIntConstant()) { + Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); + __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm); + } else if (second.IsRegister()) { + DCHECK(first.Equals(out)); __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); - } else if (second.IsConstant()) { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ imull(first.AsRegister<CpuRegister>(), imm); } else { + DCHECK(first.Equals(out)); DCHECK(second.IsStackSlot()); __ imull(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; - } case Primitive::kPrimLong: { - if (second.IsConstant()) { - int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(value)); - __ imulq(locations->Out().AsRegister<CpuRegister>(), - first.AsRegister<CpuRegister>(), - Immediate(static_cast<int32_t>(value))); - } else { - DCHECK(first.Equals(locations->Out())); + // The constant may have ended up in a register, so test explicitly to avoid + // problems where the output may not be the same as the first operand. + if (mul->InputAt(1)->IsLongConstant()) { + int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue(); + if (IsInt<32>(value)) { + __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), + Immediate(static_cast<int32_t>(value))); + } else { + // Have to use the constant area. + DCHECK(first.Equals(out)); + __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); + } + } else if (second.IsRegister()) { + DCHECK(first.Equals(out)); __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } else { + DCHECK(second.IsDoubleStackSlot()); + DCHECK(first.Equals(out)); + __ imulq(first.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); } break; } case Primitive::kPrimFloat: { - DCHECK(first.Equals(locations->Out())); + DCHECK(first.Equals(out)); if (second.IsFpuRegister()) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -2629,7 +2639,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { } case Primitive::kPrimDouble: { - DCHECK(first.Equals(locations->Out())); + DCHECK(first.Equals(out)); if (second.IsFpuRegister()) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3139,6 +3149,9 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio Location value = locations->InAt(0); switch (instruction->GetType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: case Primitive::kPrimInt: { if (value.IsRegister()) { __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); @@ -3290,11 +3303,14 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - __ gs()->call( - Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); + + codegen_->InvokeRuntime( + Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true), + instruction, + instruction->GetDexPc(), + nullptr); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { @@ -3314,11 +3330,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - __ gs()->call( - Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); + codegen_->InvokeRuntime( + Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true), + instruction, + instruction->GetDexPc(), + nullptr); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { @@ -4005,10 +4023,11 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { DCHECK_EQ(value_type, Primitive::kPrimNot); // Note: if heap poisoning is enabled, pAputObject takes cares // of poisoning the reference. - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), - true)); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction, + instruction->GetDexPc(), + nullptr); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } break; } @@ -4290,8 +4309,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - codegen_->Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + codegen_->Store64BitValueToStack(destination, value); } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); @@ -4322,8 +4340,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - codegen_->Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + codegen_->Store64BitValueToStack(destination, value); } } } else if (source.IsFpuRegister()) { @@ -4525,6 +4542,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { __ Bind(slow_path->GetExitLabel()); } +static Address GetExceptionTlsAddress() { + return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true); +} + void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); @@ -4532,10 +4553,15 @@ void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { } void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { - Address address = Address::Absolute( - Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true); - __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), address); - __ gs()->movl(address, Immediate(0)); + __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress()); +} + +void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); } void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { @@ -4546,9 +4572,10 @@ void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { } void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { - __ gs()->call( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeliverException), true)); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException), + instruction, + instruction->GetDexPc(), + nullptr); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -4658,11 +4685,11 @@ void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instructio } void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { - __ gs()->call(Address::Absolute(instruction->IsEnter() - ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pLockObject) - : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pUnlockObject), - true)); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject) + : QUICK_ENTRY_POINT(pUnlockObject), + instruction, + instruction->GetDexPc(), + nullptr); } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -4792,6 +4819,18 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderX86_64::VisitFakeString(HFakeString* instruction) { + DCHECK(codegen_->IsBaseline()); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(GetGraph()->GetNullConstant())); +} + +void InstructionCodeGeneratorX86_64::VisitFakeString(HFakeString* instruction ATTRIBUTE_UNUSED) { + DCHECK(codegen_->IsBaseline()); + // Will be generated at use site. +} + void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { if (value == 0) { __ xorl(dest, dest); @@ -4803,6 +4842,18 @@ void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { } } +void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { + DCHECK(dest.IsDoubleStackSlot()); + if (IsInt<32>(value)) { + // Can move directly as an int32 constant. + __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), + Immediate(static_cast<int32_t>(value))); + } else { + Load64BitValue(CpuRegister(TMP), value); + __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); + } +} + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 4b90381f00..41bebac240 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -232,6 +232,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + // Generate code to invoke a runtime entry point. + void InvokeRuntime(Address entry_point, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path); + size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; } @@ -316,6 +322,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Load a 64 bit value into a register in the most efficient manner. void Load64BitValue(CpuRegister dest, int64_t value); + // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. + void Store64BitValueToStack(Location dest, int64_t value); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 11f6362294..10e4bc98a6 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -81,7 +81,7 @@ static void TestCode(const uint16_t* data, * offset * ------ * v0 <- 1 0. const/4 v0, #+1 - * v1 <- -v0 1. neg-int v0, v1 + * v1 <- -v0 1. neg-int v1, v0 * return v1 2. return v1 */ TEST(ConstantFolding, IntConstantFoldingNegation) { @@ -132,6 +132,69 @@ TEST(ConstantFolding, IntConstantFoldingNegation) { } /** + * Tiny three-register program exercising long constant folding on negation. + * + * 16-bit + * offset + * ------ + * (v0, v1) <- 4294967296 0. const-wide v0 #+4294967296 + * (v2, v3) <- -(v0, v1) 1. neg-long v2, v0 + * return (v2, v3) 2. return-wide v2 + */ +TEST(ConstantFolding, LongConstantFoldingNegation) { + const int64_t input = INT64_C(4294967296); // 2^32 + const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW. + const uint16_t word1 = High16Bits(Low32Bits(input)); + const uint16_t word2 = Low16Bits(High32Bits(input)); + const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. + const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, + Instruction::NEG_LONG | 2 << 8 | 0 << 12, + Instruction::RETURN_WIDE | 2 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 4: LongConstant [7]\n" + " 12: SuspendCheck\n" + " 13: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 7: Neg(4) [10]\n" + " 10: Return(7)\n" + "BasicBlock 2, pred: 1\n" + " 11: Exit\n"; + + // Expected difference after constant folding. + diff_t expected_cf_diff = { + { " 4: LongConstant [7]\n", " 4: LongConstant\n" }, + { " 12: SuspendCheck\n", " 12: SuspendCheck\n" + " 14: LongConstant [10]\n" }, + { " 7: Neg(4) [10]\n", removed }, + { " 10: Return(7)\n", " 10: Return(14)\n" } + }; + std::string expected_after_cf = Patch(expected_before, expected_cf_diff); + + // Check the value of the computed constant. + auto check_after_cf = [](HGraph* graph) { + HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0); + ASSERT_TRUE(inst->IsLongConstant()); + ASSERT_EQ(inst->AsLongConstant()->GetValue(), INT64_C(-4294967296)); + }; + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 4: LongConstant\n", removed }, + }; + std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + + TestCode(data, + expected_before, + expected_after_cf, + expected_after_dce, + check_after_cf, + Primitive::kPrimLong); +} + +/** * Tiny three-register program exercising int constant folding on addition. * * 16-bit diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 9679d0ab70..e4bc9e68e1 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -89,6 +89,33 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { block->GetBlockId())); } + // Ensure that the only Return(Void) and Throw jump to Exit. An exiting + // TryBoundary may be between a Throw and the Exit if the Throw is in a try. + if (block->IsExitBlock()) { + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = block->GetPredecessors().Get(i); + if (predecessor->IsSingleTryBoundary() + && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) { + HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor(); + HInstruction* last_instruction = real_predecessor->GetLastInstruction(); + if (!last_instruction->IsThrow()) { + AddError(StringPrintf("Unexpected TryBoundary between %s:%d and Exit.", + last_instruction->DebugName(), + last_instruction->GetId())); + } + } else { + HInstruction* last_instruction = predecessor->GetLastInstruction(); + if (!last_instruction->IsReturn() + && !last_instruction->IsReturnVoid() + && !last_instruction->IsThrow()) { + AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.", + last_instruction->DebugName(), + last_instruction->GetId())); + } + } + } + } + // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); @@ -136,6 +163,33 @@ void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { VisitInstruction(check); } +void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { + // Ensure that all exception handlers are catch blocks and that handlers + // are not listed multiple times. + // Note that a normal-flow successor may be a catch block before CFG + // simplification. We only test normal-flow successors in SsaChecker. + for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) { + HBasicBlock* handler = it.Current(); + if (!handler->IsCatchBlock()) { + AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which " + "is not a catch block.", + current_block_->GetBlockId(), + try_boundary->DebugName(), + try_boundary->GetId(), + handler->GetBlockId())); + } + if (current_block_->GetSuccessors().Contains( + handler, /* start_from */ it.CurrentSuccessorIndex() + 1)) { + AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.", + handler->GetBlockId(), + try_boundary->DebugName(), + try_boundary->GetId())); + } + } + + VisitInstruction(try_boundary); +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -301,11 +355,65 @@ void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { void SSAChecker::VisitBasicBlock(HBasicBlock* block) { super_type::VisitBasicBlock(block); + // Ensure that only catch blocks have exceptional predecessors, and if they do + // these are instructions which throw into them. + if (block->IsCatchBlock()) { + for (size_t i = 0, e = block->GetExceptionalPredecessors().Size(); i < e; ++i) { + HInstruction* thrower = block->GetExceptionalPredecessors().Get(i); + HBasicBlock* try_block = thrower->GetBlock(); + if (!thrower->CanThrow()) { + AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d does not throw.", + thrower->DebugName(), + thrower->GetId(), + block->GetBlockId())); + } else if (!try_block->IsInTry()) { + AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d " + "is not in a try block.", + thrower->DebugName(), + thrower->GetId(), + block->GetBlockId())); + } else if (!try_block->GetTryEntry()->HasExceptionHandler(*block)) { + AddError(StringPrintf("Catch block %d is not an exception handler of " + "its exceptional predecessor %s:%d.", + block->GetBlockId(), + thrower->DebugName(), + thrower->GetId())); + } + } + } else { + if (!block->GetExceptionalPredecessors().IsEmpty()) { + AddError(StringPrintf("Normal block %d has %zu exceptional predecessors.", + block->GetBlockId(), + block->GetExceptionalPredecessors().Size())); + } + } + + // Ensure that catch blocks are not normal successors, and normal blocks are + // never exceptional successors. + const size_t num_normal_successors = block->NumberOfNormalSuccessors(); + for (size_t j = 0; j < num_normal_successors; ++j) { + HBasicBlock* successor = block->GetSuccessors().Get(j); + if (successor->IsCatchBlock()) { + AddError(StringPrintf("Catch block %d is a normal successor of block %d.", + successor->GetBlockId(), + block->GetBlockId())); + } + } + for (size_t j = num_normal_successors, e = block->GetSuccessors().Size(); j < e; ++j) { + HBasicBlock* successor = block->GetSuccessors().Get(j); + if (!successor->IsCatchBlock()) { + AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.", + successor->GetBlockId(), + block->GetBlockId())); + } + } + // Ensure there is no critical edge (i.e., an edge connecting a // block with multiple successors to a block with multiple - // predecessors). - if (block->GetSuccessors().Size() > 1) { - for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + // predecessors). Exceptional edges are synthesized and hence + // not accounted for. + if (block->NumberOfNormalSuccessors() > 1) { + for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); if (successor->GetPredecessors().Size() > 1) { AddError(StringPrintf("Critical edge between blocks %d and %d.", @@ -326,6 +434,54 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Ensure try membership information is consistent. + HTryBoundary* try_entry = block->GetTryEntry(); + if (block->IsCatchBlock()) { + if (try_entry != nullptr) { + AddError(StringPrintf("Catch blocks should not be try blocks but catch block %d " + "has try entry %s:%d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId())); + } + + if (block->IsLoopHeader()) { + AddError(StringPrintf("Catch blocks should not be loop headers but catch block %d is.", + block->GetBlockId())); + } + } else { + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + HBasicBlock* predecessor = block->GetPredecessors().Get(i); + HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors(); + if (try_entry == nullptr) { + if (incoming_try_entry != nullptr) { + AddError(StringPrintf("Block %d has no try entry but try entry %s:%d follows " + "from predecessor %d.", + block->GetBlockId(), + incoming_try_entry->DebugName(), + incoming_try_entry->GetId(), + predecessor->GetBlockId())); + } + } else if (incoming_try_entry == nullptr) { + AddError(StringPrintf("Block %d has try entry %s:%d but no try entry follows " + "from predecessor %d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId(), + predecessor->GetBlockId())); + } else if (!incoming_try_entry->HasSameExceptionHandlersAs(*try_entry)) { + AddError(StringPrintf("Block %d has try entry %s:%d which is not consistent " + "with %s:%d that follows from predecessor %d.", + block->GetBlockId(), + try_entry->DebugName(), + try_entry->GetId(), + incoming_try_entry->DebugName(), + incoming_try_entry->GetId(), + predecessor->GetBlockId())); + } + } + } + if (block->IsLoopHeader()) { CheckLoop(block); } @@ -416,6 +572,7 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { void SSAChecker::VisitInstruction(HInstruction* instruction) { super_type::VisitInstruction(instruction); + HBasicBlock* block = instruction->GetBlock(); // Ensure an instruction dominates all its uses. for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); @@ -447,6 +604,24 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } } + + // Ensure that throwing instructions in try blocks are listed as exceptional + // predecessors in their exception handlers. + if (instruction->CanThrow() && block->IsInTry()) { + for (HExceptionHandlerIterator handler_it(*block->GetTryEntry()); + !handler_it.Done(); + handler_it.Advance()) { + if (!handler_it.Current()->GetExceptionalPredecessors().Contains(instruction)) { + AddError(StringPrintf("Instruction %s:%d is in try block %d and can throw " + "but its exception handler %d does not list it in " + "its exceptional predecessors.", + instruction->DebugName(), + instruction->GetId(), + block->GetBlockId(), + handler_it.Current()->GetBlockId())); + } + } + } } static Primitive::Type PrimitiveKind(Primitive::Type type) { @@ -472,32 +647,6 @@ void SSAChecker::VisitPhi(HPhi* phi) { phi->GetBlock()->GetBlockId())); } - // Ensure the number of inputs of a phi is the same as the number of - // its predecessors. - const GrowableArray<HBasicBlock*>& predecessors = - phi->GetBlock()->GetPredecessors(); - if (phi->InputCount() != predecessors.Size()) { - AddError(StringPrintf( - "Phi %d in block %d has %zu inputs, " - "but block %d has %zu predecessors.", - phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), - phi->GetBlock()->GetBlockId(), predecessors.Size())); - } else { - // Ensure phi input at index I either comes from the Ith - // predecessor or from a block that dominates this predecessor. - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - HBasicBlock* predecessor = predecessors.Get(i); - if (!(input->GetBlock() == predecessor - || input->GetBlock()->Dominates(predecessor))) { - AddError(StringPrintf( - "Input %d at index %zu of phi %d from block %d is not defined in " - "predecessor number %zu nor in a block dominating it.", - input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), - i)); - } - } - } // Ensure that the inputs have the same primitive kind as the phi. for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { HInstruction* input = phi->InputAt(i); @@ -516,6 +665,59 @@ void SSAChecker::VisitPhi(HPhi* phi) { phi->GetBlock()->GetBlockId(), Primitive::PrettyDescriptor(phi->GetType()))); } + + if (phi->IsCatchPhi()) { + // The number of inputs of a catch phi corresponds to the total number of + // throwing instructions caught by this catch block. + const GrowableArray<HInstruction*>& predecessors = + phi->GetBlock()->GetExceptionalPredecessors(); + if (phi->InputCount() != predecessors.Size()) { + AddError(StringPrintf( + "Phi %d in catch block %d has %zu inputs, " + "but catch block %d has %zu exceptional predecessors.", + phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), + phi->GetBlock()->GetBlockId(), predecessors.Size())); + } else { + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + HInstruction* thrower = predecessors.Get(i); + if (!input->StrictlyDominates(thrower)) { + AddError(StringPrintf( + "Input %d at index %zu of phi %d from catch block %d does not " + "dominate the throwing instruction %s:%d.", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + thrower->DebugName(), thrower->GetId())); + } + } + } + } else { + // Ensure the number of inputs of a non-catch phi is the same as the number + // of its predecessors. + const GrowableArray<HBasicBlock*>& predecessors = + phi->GetBlock()->GetPredecessors(); + if (phi->InputCount() != predecessors.Size()) { + AddError(StringPrintf( + "Phi %d in block %d has %zu inputs, " + "but block %d has %zu predecessors.", + phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(), + phi->GetBlock()->GetBlockId(), predecessors.Size())); + } else { + // Ensure phi input at index I either comes from the Ith + // predecessor or from a block that dominates this predecessor. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* predecessor = predecessors.Get(i); + if (!(input->GetBlock() == predecessor + || input->GetBlock()->Dominates(predecessor))) { + AddError(StringPrintf( + "Input %d at index %zu of phi %d from block %d is not defined in " + "predecessor number %zu nor in a block dominating it.", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + i)); + } + } + } + } } void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 7c72e23e2d..0e270dbe18 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -48,6 +48,9 @@ class GraphChecker : public HGraphDelegateVisitor { // Check that the HasBoundsChecks() flag is set for bounds checks. void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + // Check successors of blocks ending in TryBoundary. + void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; + // Check that HCheckCast and HInstanceOf have HLoadClass as second input. void VisitCheckCast(HCheckCast* check) OVERRIDE; void VisitInstanceOf(HInstanceOf* check) OVERRIDE; diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index eca0d9344f..0f6677519e 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -25,14 +25,14 @@ namespace art { * Create a simple control-flow graph composed of two blocks: * * BasicBlock 0, succ: 1 - * 0: Goto 1 + * 0: ReturnVoid 1 * BasicBlock 1, pred: 0 * 1: Exit */ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { HGraph* graph = CreateGraph(allocator); HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); - entry_block->AddInstruction(new (allocator) HGoto()); + entry_block->AddInstruction(new (allocator) HReturnVoid()); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); HBasicBlock* exit_block = new (allocator) HBasicBlock(graph); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 37c060c1b1..069a7a460b 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -158,12 +158,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { std::ostream& output, const char* pass_name, bool is_after_pass, + bool graph_in_bad_state, const CodeGenerator& codegen, const DisassemblyInformation* disasm_info = nullptr) : HGraphDelegateVisitor(graph), output_(output), pass_name_(pass_name), is_after_pass_(is_after_pass), + graph_in_bad_state_(graph_in_bad_state), codegen_(codegen), disasm_info_(disasm_info), disassembler_(disasm_info_ != nullptr @@ -251,11 +253,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintSuccessors(HBasicBlock* block) { AddIndent(); output_ << "successors"; - for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { - if (!block->IsExceptionalSuccessor(i)) { - HBasicBlock* successor = block->GetSuccessors().Get(i); - output_ << " \"B" << successor->GetBlockId() << "\" "; - } + for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) { + HBasicBlock* successor = block->GetSuccessors().Get(i); + output_ << " \"B" << successor->GetBlockId() << "\" "; } output_<< std::endl; } @@ -263,11 +263,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintExceptionHandlers(HBasicBlock* block) { AddIndent(); output_ << "xhandlers"; - for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { - if (block->IsExceptionalSuccessor(i)) { - HBasicBlock* handler = block->GetSuccessors().Get(i); - output_ << " \"B" << handler->GetBlockId() << "\" "; - } + for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().Size(); ++i) { + HBasicBlock* handler = block->GetSuccessors().Get(i); + output_ << " \"B" << handler->GetBlockId() << "\" "; } if (block->IsExitBlock() && (disasm_info_ != nullptr) && @@ -351,6 +349,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitPhi(HPhi* phi) OVERRIDE { StartAttributeStream("reg") << phi->GetRegNumber(); + StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha; } void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { @@ -387,6 +386,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("recursive") << std::boolalpha << invoke->IsRecursive() << std::noboolalpha; + StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); } void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE { @@ -397,6 +397,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { return strcmp(pass_name_, name) == 0; } + bool IsReferenceTypePropagationPass() { + return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName) + != nullptr; + } + void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); if (instruction->InputCount() > 0) { @@ -460,27 +465,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } - } else if (IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) - && is_after_pass_) { - if (instruction->GetType() == Primitive::kPrimNot) { - if (instruction->IsLoadClass()) { - ReferenceTypeInfo info = instruction->AsLoadClass()->GetLoadedClassRTI(); - ScopedObjectAccess soa(Thread::Current()); - if (info.GetTypeHandle().GetReference() != nullptr) { - StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get()); - } else { - StartAttributeStream("klass") << "unresolved"; - } - } else { - ReferenceTypeInfo info = instruction->GetReferenceTypeInfo(); - if (info.IsTop()) { - StartAttributeStream("klass") << "java.lang.Object"; - } else { - ScopedObjectAccess soa(Thread::Current()); - StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get()); - } - StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } + } else if (IsReferenceTypePropagationPass() + && (instruction->GetType() == Primitive::kPrimNot)) { + ReferenceTypeInfo info = instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo(); + ScopedObjectAccess soa(Thread::Current()); + if (info.IsValid()) { + StartAttributeStream("klass") << PrettyDescriptor(info.GetTypeHandle().Get()); + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; + } else { + DCHECK(!is_after_pass_) << "Type info should be valid after reference type propagation"; } } if (disasm_info_ != nullptr) { @@ -582,7 +579,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void Run() { StartTag("cfg"); - std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)"); + std::string pass_desc = std::string(pass_name_) + + " (" + + (is_after_pass_ ? "after" : "before") + + (graph_in_bad_state_ ? ", bad_state" : "") + + ")"; PrintProperty("name", pass_desc.c_str()); if (disasm_info_ != nullptr) { DumpDisassemblyBlockForFrameEntry(); @@ -651,6 +652,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { std::ostream& output_; const char* pass_name_; const bool is_after_pass_; + const bool graph_in_bad_state_; const CodeGenerator& codegen_; const DisassemblyInformation* disasm_info_; std::unique_ptr<HGraphVisualizerDisassembler> disassembler_; @@ -666,7 +668,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, void HGraphVisualizer::PrintHeader(const char* method_name) const { DCHECK(output_ != nullptr); - HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); + HGraphVisualizerPrinter printer(graph_, *output_, "", true, false, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); printer.PrintProperty("method", method_name); @@ -674,10 +676,17 @@ void HGraphVisualizer::PrintHeader(const char* method_name) const { printer.EndTag("compilation"); } -void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const { +void HGraphVisualizer::DumpGraph(const char* pass_name, + bool is_after_pass, + bool graph_in_bad_state) const { DCHECK(output_ != nullptr); if (!graph_->GetBlocks().IsEmpty()) { - HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_); + HGraphVisualizerPrinter printer(graph_, + *output_, + pass_name, + is_after_pass, + graph_in_bad_state, + codegen_); printer.Run(); } } @@ -685,8 +694,13 @@ void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) cons void HGraphVisualizer::DumpGraphWithDisassembly() const { DCHECK(output_ != nullptr); if (!graph_->GetBlocks().IsEmpty()) { - HGraphVisualizerPrinter printer( - graph_, *output_, "disassembly", true, codegen_, codegen_.GetDisassemblyInformation()); + HGraphVisualizerPrinter printer(graph_, + *output_, + "disassembly", + /* is_after_pass */ true, + /* graph_in_bad_state */ false, + codegen_, + codegen_.GetDisassemblyInformation()); printer.Run(); } } diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index b6b66df601..66588f6e36 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -104,7 +104,7 @@ class HGraphVisualizer : public ValueObject { const CodeGenerator& codegen); void PrintHeader(const char* method_name) const; - void DumpGraph(const char* pass_name, bool is_after_pass = true) const; + void DumpGraph(const char* pass_name, bool is_after_pass, bool graph_in_bad_state) const; void DumpGraphWithDisassembly() const; private: diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 708733e28c..833dfb00a1 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -120,7 +120,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { // Removes all instructions in the set affected by the given side effects. void Kill(SideEffects side_effects) { DeleteAllImpureWhich([side_effects](Node* node) { - return node->GetInstruction()->GetSideEffects().DependsOn(side_effects); + return node->GetInstruction()->GetSideEffects().MayDependOn(side_effects); }); } diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index d8a09ffc38..42ef3ff4a5 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -206,7 +206,7 @@ TEST(GVNTest, LoopFieldElimination) { // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, parameter, - Primitive::kPrimNot, + Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, @@ -266,6 +266,8 @@ TEST(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); + static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC(); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -309,7 +311,7 @@ TEST(GVNTest, LoopSideEffects) { ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn( *outer_loop_header->GetLoopInformation())); - // Check that the loops don't have side effects. + // Check that the only side effect of loops is to potentially trigger GC. { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, @@ -323,9 +325,12 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).Equals(kCanTriggerGC)); + ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).Equals(kCanTriggerGC)); } // Check that the side effects of the outer loop does not affect the inner loop. @@ -343,10 +348,11 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).Equals(kCanTriggerGC)); } // Check that the side effects of the inner loop affects the outer loop. @@ -365,10 +371,10 @@ TEST(GVNTest, LoopSideEffects) { SideEffectsAnalysis side_effects(graph); side_effects.Run(); - ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects()); - ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects()); - ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects()); + ASSERT_TRUE(side_effects.GetBlockEffects(entry).DoesAnyWrite()); + ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).DoesAnyWrite()); + ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).DoesAnyWrite()); } } } // namespace art diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 3efe7c77fa..4c746798be 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -22,8 +22,10 @@ #include "constant_folding.h" #include "dead_code_elimination.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "instruction_simplifier.h" +#include "intrinsics.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" @@ -38,9 +40,6 @@ namespace art { -static constexpr int kMaxInlineCodeUnits = 18; -static constexpr int kDepthLimit = 3; - void HInliner::Run() { if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids @@ -86,7 +85,7 @@ void HInliner::Run() { } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); } @@ -96,7 +95,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) * Return nullptr if the runtime target cannot be proven. */ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { if (IsMethodOrDeclaringClassFinal(resolved_method)) { // No need to lookup further, the resolved method will be the target. return resolved_method; @@ -109,10 +108,8 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol receiver = receiver->InputAt(0); } ReferenceTypeInfo info = receiver->GetReferenceTypeInfo(); - if (info.IsTop()) { - // We have no information on the receiver. - return nullptr; - } else if (!info.IsExact()) { + DCHECK(info.IsValid()) << "Invalid RTI for " << receiver->DebugName(); + if (!info.IsExact()) { // We currently only support inlining with known receivers. // TODO: Remove this check, we should be able to inline final methods // on unknown receivers. @@ -164,7 +161,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol static uint32_t FindMethodIndexIn(ArtMethod* method, const DexFile& dex_file, uint32_t referrer_index) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { return method->GetDexMethodIndex(); } else { @@ -221,7 +218,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } - if (code_item->insns_size_in_code_units_ > kMaxInlineCodeUnits) { + size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); + if (code_item->insns_size_in_code_units_ > inline_max_code_units) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " is too big to inline"; return false; @@ -273,17 +271,17 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); - + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); DexCompilationUnit dex_compilation_unit( nullptr, caller_compilation_unit_.GetClassLoader(), - caller_compilation_unit_.GetClassLinker(), - *resolved_method->GetDexFile(), + class_linker, + callee_dex_file, code_item, resolved_method->GetDeclaringClass()->GetDexClassDefIndex(), - resolved_method->GetDexMethodIndex(), + method_index, resolved_method->GetAccessFlags(), - nullptr); + compiler_driver_->GetVerifiedMethod(&callee_dex_file, method_index)); bool requires_ctor_barrier = false; @@ -326,7 +324,8 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &outer_compilation_unit_, resolved_method->GetDexFile(), compiler_driver_, - &inline_stats); + &inline_stats, + resolved_method->GetQuickenedInfo()); if (!builder.BuildGraph(*code_item)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) @@ -357,8 +356,10 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HConstantFolding fold(callee_graph); ReferenceTypePropagation type_propagation(callee_graph, handles_); InstructionSimplifier simplify(callee_graph, stats_); + IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_); HOptimization* optimizations[] = { + &intrinsics, &dce, &fold, &type_propagation, @@ -370,7 +371,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, optimization->Run(); } - if (depth_ + 1 < kDepthLimit) { + if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) { HInliner inliner(callee_graph, outer_compilation_unit_, dex_compilation_unit, @@ -449,7 +450,33 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } - callee_graph->InlineInto(graph_, invoke_instruction); + HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction); + + // When merging the graph we might create a new NullConstant in the caller graph which does + // not have the chance to be typed. We assign the correct type here so that we can keep the + // assertion that every reference has a valid type. This also simplifies checks along the way. + HNullConstant* null_constant = graph_->GetNullConstant(); + if (!null_constant->GetReferenceTypeInfo().IsValid()) { + ReferenceTypeInfo::TypeHandle obj_handle = + handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject)); + null_constant->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); + } + + if ((return_replacement != nullptr) + && (return_replacement->GetType() == Primitive::kPrimNot)) { + if (!return_replacement->GetReferenceTypeInfo().IsValid()) { + // Make sure that we have a valid type for the return. We may get an invalid one when + // we inline invokes with multiple branches and create a Phi for the result. + // TODO: we could be more precise by merging the phi inputs but that requires + // some functionality from the reference type propagation. + DCHECK(return_replacement->IsPhi()); + ReferenceTypeInfo::TypeHandle return_handle = + handles_->NewHandle(resolved_method->GetReturnType()); + return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( + return_handle, return_handle->IsFinal() /* is_exact */)); + } + } return true; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 337cf5b525..df6e550b4a 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -70,6 +70,7 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; + void VisitFakeString(HFakeString* fake_string) OVERRIDE; bool IsDominatedByInputNullCheck(HInstruction* instr); OptimizingCompilerStats* stats_; @@ -194,16 +195,17 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { - if (!klass->IsResolved()) { - // If the class couldn't be resolve it's not safe to compare against it. It's - // default type would be Top which might be wider that the actual class type - // and thus producing wrong results. + DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ScopedObjectAccess soa(Thread::Current()); + if (!obj_rti.IsValid()) { + // We run the simplifier before the reference type propagation so type info might not be + // available. return false; } - ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); - ScopedObjectAccess soa(Thread::Current()); + DCHECK(class_rti.IsValid() && class_rti.IsExact()); if (class_rti.IsSupertypeOf(obj_rti)) { *outcome = true; return true; @@ -236,12 +238,19 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { } bool outcome; - if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } else { // Don't do anything for exceptional cases for now. Ideally we should remove // all instructions and blocks this instruction dominates. @@ -266,7 +275,8 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } bool outcome; - if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); + if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { if (outcome && can_be_null) { // Type test will succeed, we just need a null test. HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); @@ -278,6 +288,12 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); + if (outcome && !load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } @@ -436,9 +452,14 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { // ADD dst, src, 0 // with // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); - return; + // Note that we cannot optimize `x + 0.0` to `x` for floating-point. When + // `x` is `-0.0`, the former expression yields `0.0`, while the later + // yields `-0.0`. + if (Primitive::IsIntegralType(instruction->GetType())) { + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } } HInstruction* left = instruction->GetLeft(); @@ -517,10 +538,10 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // Try to fold an HCompare into this HCondition. - // This simplification is currently only supported on x86 and x86_64. - // TODO: Implement it for ARM, ARM64 and MIPS64. + // This simplification is currently supported on x86, x86_64, ARM and ARM64. + // TODO: Implement it for MIPS64. InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - if (instruction_set != kX86 && instruction_set != kX86_64) { + if (instruction_set == kMips64) { return; } @@ -798,21 +819,24 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); + Primitive::Type type = instruction->GetType(); + if (Primitive::IsFloatingPointType(type)) { + return; + } + if ((input_cst != nullptr) && input_cst->IsZero()) { // Replace code looking like // SUB dst, src, 0 // with // src + // Note that we cannot optimize `x - 0.0` to `x` for floating-point. When + // `x` is `-0.0`, the former expression yields `0.0`, while the later + // yields `-0.0`. instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); return; } - Primitive::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { - return; - } - HBasicBlock* block = instruction->GetBlock(); ArenaAllocator* allocator = GetGraph()->GetArena(); @@ -903,4 +927,46 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { } } +void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) { + HInstruction* actual_string = nullptr; + + // Find the string we need to replace this instruction with. The actual string is + // the return value of a StringFactory call. + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HInstruction* use = it.Current()->GetUser(); + if (use->IsInvokeStaticOrDirect() + && use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction)) { + use->AsInvokeStaticOrDirect()->RemoveFakeStringArgumentAsLastInput(); + actual_string = use; + break; + } + } + + // Check that there is no other instruction that thinks it is the factory for that string. + if (kIsDebugBuild) { + CHECK(actual_string != nullptr); + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HInstruction* use = it.Current()->GetUser(); + if (use->IsInvokeStaticOrDirect()) { + CHECK(!use->AsInvokeStaticOrDirect()->IsStringFactoryFor(instruction)); + } + } + } + + // We need to remove any environment uses of the fake string that are not dominated by + // `actual_string` to null. + for (HUseIterator<HEnvironment*> it(instruction->GetEnvUses()); !it.Done(); it.Advance()) { + HEnvironment* environment = it.Current()->GetUser(); + if (!actual_string->StrictlyDominates(environment->GetHolder())) { + environment->RemoveAsUserOfInput(it.Current()->GetIndex()); + environment->SetRawEnvAt(it.Current()->GetIndex(), nullptr); + } + } + + // Only uses dominated by `actual_string` must remain. We can safely replace and remove + // `instruction`. + instruction->ReplaceWith(actual_string); + instruction->GetBlock()->RemoveInstruction(instruction); +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index faee2dd91e..cc4b6f6adc 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -31,7 +31,7 @@ class InstructionSimplifier : public HOptimization { InstructionSimplifier(HGraph* graph, OptimizingCompilerStats* stats = nullptr, const char* name = kInstructionSimplifierPassName) - : HOptimization(graph, name, stats) {} + : HOptimization(graph, name, stats) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 8ef13e125e..3db9816173 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -31,7 +31,7 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kInterface; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ case Intrinsics::k ## Name: \ return IsStatic; #include "intrinsics_list.h" @@ -42,7 +42,21 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) return kInterface; } - +// Function that returns whether an intrinsic needs an environment or not. +static inline IntrinsicNeedsEnvironment IntrinsicNeedsEnvironment(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kNeedsEnvironment; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ + case Intrinsics::k ## Name: \ + return NeedsEnvironment; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kNeedsEnvironment; +} static Primitive::Type GetType(uint64_t data, bool is_op_size) { if (is_op_size) { @@ -70,7 +84,10 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } } -static Intrinsics GetIntrinsic(InlineMethod method) { +static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { + if (instruction_set == kMips || instruction_set == kMips64) { + return Intrinsics::kNone; + } switch (method.opcode) { // Floating-point conversions. case kIntrinsicDoubleCvt: @@ -103,6 +120,16 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } + case kIntrinsicNumberOfLeadingZeros: + switch (GetType(method.d.data, true)) { + case Primitive::kPrimInt: + return Intrinsics::kIntegerNumberOfLeadingZeros; + case Primitive::kPrimLong: + return Intrinsics::kLongNumberOfLeadingZeros; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } // Abs. case kIntrinsicAbsDouble: @@ -187,6 +214,8 @@ static Intrinsics GetIntrinsic(InlineMethod method) { return Intrinsics::kStringCharAt; case kIntrinsicCompareTo: return Intrinsics::kStringCompareTo; + case kIntrinsicEquals: + return Intrinsics::kStringEquals; case kIntrinsicGetCharsNoCheck: return Intrinsics::kStringGetCharsNoCheck; case kIntrinsicIsEmptyOrLength: @@ -339,7 +368,7 @@ void IntrinsicsRecognizer::Run() { driver_->GetMethodInlinerMap()->GetMethodInliner(&invoke->GetDexFile()); DCHECK(inliner != nullptr); if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method); + Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet()); if (intrinsic != Intrinsics::kNone) { if (!CheckInvokeType(intrinsic, invoke)) { @@ -347,7 +376,7 @@ void IntrinsicsRecognizer::Run() { << intrinsic << " for " << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()); } else { - invoke->SetIntrinsic(intrinsic); + invoke->SetIntrinsic(intrinsic, IntrinsicNeedsEnvironment(intrinsic)); } } } @@ -359,9 +388,9 @@ void IntrinsicsRecognizer::Run() { std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { switch (intrinsic) { case Intrinsics::kNone: - os << "No intrinsic."; + os << "None"; break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ case Intrinsics::k ## Name: \ os << # Name; \ break; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 90449828ad..d1a17b6def 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -54,7 +54,7 @@ class IntrinsicVisitor : public ValueObject { switch (invoke->GetIntrinsic()) { case Intrinsics::kNone: return; -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ case Intrinsics::k ## Name: \ Visit ## Name(invoke); \ return; @@ -69,7 +69,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } #include "intrinsics_list.h" diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index b4dbf75f0a..1527a6aa23 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -224,6 +224,48 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } +static void GenNumberOfLeadingZeros(LocationSummary* locations, + Primitive::Type type, + ArmAssembler* assembler) { + Location in = locations->InAt(0); + Register out = locations->Out().AsRegister<Register>(); + + DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + + if (type == Primitive::kPrimLong) { + Register in_reg_lo = in.AsRegisterPairLow<Register>(); + Register in_reg_hi = in.AsRegisterPairHigh<Register>(); + Label end; + __ clz(out, in_reg_hi); + __ CompareAndBranchIfNonZero(in_reg_hi, &end); + __ clz(out, in_reg_lo); + __ AddConstant(out, 32); + __ Bind(&end); + } else { + __ clz(out, in.AsRegister<Register>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { Location in = locations->InAt(0); Location out = locations->Out(); @@ -1068,6 +1110,7 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(StringEquals) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index 8bfb7d4686..f013bd6083 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -33,13 +33,12 @@ class CodeGeneratorARM; class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena, - const ArmInstructionSetFeatures& features) + IntrinsicLocationsBuilderARM(ArenaAllocator* arena, const ArmInstructionSetFeatures& features) : arena_(arena), features_(features) {} // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -65,7 +64,7 @@ class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 78ac167a87..a5332ea794 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -260,6 +260,33 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler()); } +static void GenNumberOfLeadingZeros(LocationSummary* locations, + Primitive::Type type, + vixl::MacroAssembler* masm) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + Location in = locations->InAt(0); + Location out = locations->Out(); + + __ Clz(RegisterFrom(out, type), RegisterFrom(in, type)); +} + +void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); +} + static void GenReverse(LocationSummary* locations, Primitive::Type type, vixl::MacroAssembler* masm) { @@ -1028,6 +1055,102 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = WRegisterFrom(locations->InAt(0)); + Register arg = WRegisterFrom(locations->InAt(1)); + Register out = XRegisterFrom(locations->Out()); + + UseScratchRegisterScope scratch_scope(masm); + Register temp = scratch_scope.AcquireW(); + Register temp1 = WRegisterFrom(locations->GetTemp(0)); + Register temp2 = WRegisterFrom(locations->GetTemp(1)); + + vixl::Label loop; + vixl::Label end; + vixl::Label return_true; + vixl::Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check if input is null, return false if it is. + __ Cbz(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Cmp(str, arg); + __ B(&return_true, eq); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Ldr(temp, MemOperand(str.X(), class_offset)); + __ Ldr(temp1, MemOperand(arg.X(), class_offset)); + __ Cmp(temp, temp1); + __ B(&return_false, ne); + + // Load lengths of this and argument strings. + __ Ldr(temp, MemOperand(str.X(), count_offset)); + __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + // Check if lengths are equal, return false if they're not. + __ Cmp(temp, temp1); + __ B(&return_false, ne); + // Store offset of string value in preparation for comparison loop + __ Mov(temp1, value_offset); + // Return true if both strings are empty. + __ Cbz(temp, &return_true); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + temp1 = temp1.X(); + temp2 = temp2.X(); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ldr(out, MemOperand(str.X(), temp1)); + __ Ldr(temp2, MemOperand(arg.X(), temp1)); + __ Add(temp1, temp1, Operand(sizeof(uint64_t))); + __ Cmp(out, temp2); + __ B(&return_false, ne); + __ Sub(temp, temp, Operand(4), SetFlags); + __ B(&loop, gt); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ Mov(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ Mov(out, 0); + __ Bind(&end); +} + static void GenerateVisitStringIndexOf(HInvoke* invoke, vixl::MacroAssembler* masm, CodeGeneratorARM64* codegen, diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index ba21889839..ebaf5e5952 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -41,7 +41,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -65,7 +65,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 2c9248f52c..15ee5d4d12 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -18,73 +18,77 @@ #define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected -// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual). +// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual), then whether it requires an +// environment. #define INTRINSICS_LIST(V) \ - V(DoubleDoubleToRawLongBits, kStatic) \ - V(DoubleLongBitsToDouble, kStatic) \ - V(FloatFloatToRawIntBits, kStatic) \ - V(FloatIntBitsToFloat, kStatic) \ - V(IntegerReverse, kStatic) \ - V(IntegerReverseBytes, kStatic) \ - V(LongReverse, kStatic) \ - V(LongReverseBytes, kStatic) \ - V(ShortReverseBytes, kStatic) \ - V(MathAbsDouble, kStatic) \ - V(MathAbsFloat, kStatic) \ - V(MathAbsLong, kStatic) \ - V(MathAbsInt, kStatic) \ - V(MathMinDoubleDouble, kStatic) \ - V(MathMinFloatFloat, kStatic) \ - V(MathMinLongLong, kStatic) \ - V(MathMinIntInt, kStatic) \ - V(MathMaxDoubleDouble, kStatic) \ - V(MathMaxFloatFloat, kStatic) \ - V(MathMaxLongLong, kStatic) \ - V(MathMaxIntInt, kStatic) \ - V(MathSqrt, kStatic) \ - V(MathCeil, kStatic) \ - V(MathFloor, kStatic) \ - V(MathRint, kStatic) \ - V(MathRoundDouble, kStatic) \ - V(MathRoundFloat, kStatic) \ - V(SystemArrayCopyChar, kStatic) \ - V(ThreadCurrentThread, kStatic) \ - V(MemoryPeekByte, kStatic) \ - V(MemoryPeekIntNative, kStatic) \ - V(MemoryPeekLongNative, kStatic) \ - V(MemoryPeekShortNative, kStatic) \ - V(MemoryPokeByte, kStatic) \ - V(MemoryPokeIntNative, kStatic) \ - V(MemoryPokeLongNative, kStatic) \ - V(MemoryPokeShortNative, kStatic) \ - V(StringCharAt, kDirect) \ - V(StringCompareTo, kDirect) \ - V(StringGetCharsNoCheck, kDirect) \ - V(StringIndexOf, kDirect) \ - V(StringIndexOfAfter, kDirect) \ - V(StringNewStringFromBytes, kStatic) \ - V(StringNewStringFromChars, kStatic) \ - V(StringNewStringFromString, kStatic) \ - V(UnsafeCASInt, kDirect) \ - V(UnsafeCASLong, kDirect) \ - V(UnsafeCASObject, kDirect) \ - V(UnsafeGet, kDirect) \ - V(UnsafeGetVolatile, kDirect) \ - V(UnsafeGetObject, kDirect) \ - V(UnsafeGetObjectVolatile, kDirect) \ - V(UnsafeGetLong, kDirect) \ - V(UnsafeGetLongVolatile, kDirect) \ - V(UnsafePut, kDirect) \ - V(UnsafePutOrdered, kDirect) \ - V(UnsafePutVolatile, kDirect) \ - V(UnsafePutObject, kDirect) \ - V(UnsafePutObjectOrdered, kDirect) \ - V(UnsafePutObjectVolatile, kDirect) \ - V(UnsafePutLong, kDirect) \ - V(UnsafePutLongOrdered, kDirect) \ - V(UnsafePutLongVolatile, kDirect) \ - V(ReferenceGetReferent, kDirect) + V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironment) \ + V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironment) \ + V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironment) \ + V(FloatIntBitsToFloat, kStatic, kNeedsEnvironment) \ + V(IntegerReverse, kStatic, kNeedsEnvironment) \ + V(IntegerReverseBytes, kStatic, kNeedsEnvironment) \ + V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironment) \ + V(LongReverse, kStatic, kNeedsEnvironment) \ + V(LongReverseBytes, kStatic, kNeedsEnvironment) \ + V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironment) \ + V(ShortReverseBytes, kStatic, kNeedsEnvironment) \ + V(MathAbsDouble, kStatic, kNeedsEnvironment) \ + V(MathAbsFloat, kStatic, kNeedsEnvironment) \ + V(MathAbsLong, kStatic, kNeedsEnvironment) \ + V(MathAbsInt, kStatic, kNeedsEnvironment) \ + V(MathMinDoubleDouble, kStatic, kNeedsEnvironment) \ + V(MathMinFloatFloat, kStatic, kNeedsEnvironment) \ + V(MathMinLongLong, kStatic, kNeedsEnvironment) \ + V(MathMinIntInt, kStatic, kNeedsEnvironment) \ + V(MathMaxDoubleDouble, kStatic, kNeedsEnvironment) \ + V(MathMaxFloatFloat, kStatic, kNeedsEnvironment) \ + V(MathMaxLongLong, kStatic, kNeedsEnvironment) \ + V(MathMaxIntInt, kStatic, kNeedsEnvironment) \ + V(MathSqrt, kStatic, kNeedsEnvironment) \ + V(MathCeil, kStatic, kNeedsEnvironment) \ + V(MathFloor, kStatic, kNeedsEnvironment) \ + V(MathRint, kStatic, kNeedsEnvironment) \ + V(MathRoundDouble, kStatic, kNeedsEnvironment) \ + V(MathRoundFloat, kStatic, kNeedsEnvironment) \ + V(SystemArrayCopyChar, kStatic, kNeedsEnvironment) \ + V(ThreadCurrentThread, kStatic, kNeedsEnvironment) \ + V(MemoryPeekByte, kStatic, kNeedsEnvironment) \ + V(MemoryPeekIntNative, kStatic, kNeedsEnvironment) \ + V(MemoryPeekLongNative, kStatic, kNeedsEnvironment) \ + V(MemoryPeekShortNative, kStatic, kNeedsEnvironment) \ + V(MemoryPokeByte, kStatic, kNeedsEnvironment) \ + V(MemoryPokeIntNative, kStatic, kNeedsEnvironment) \ + V(MemoryPokeLongNative, kStatic, kNeedsEnvironment) \ + V(MemoryPokeShortNative, kStatic, kNeedsEnvironment) \ + V(StringCharAt, kDirect, kNeedsEnvironment) \ + V(StringCompareTo, kDirect, kNeedsEnvironment) \ + V(StringEquals, kDirect, kNeedsEnvironment) \ + V(StringGetCharsNoCheck, kDirect, kNeedsEnvironment) \ + V(StringIndexOf, kDirect, kNeedsEnvironment) \ + V(StringIndexOfAfter, kDirect, kNeedsEnvironment) \ + V(StringNewStringFromBytes, kStatic, kNeedsEnvironment) \ + V(StringNewStringFromChars, kStatic, kNeedsEnvironment) \ + V(StringNewStringFromString, kStatic, kNeedsEnvironment) \ + V(UnsafeCASInt, kDirect, kNeedsEnvironment) \ + V(UnsafeCASLong, kDirect, kNeedsEnvironment) \ + V(UnsafeCASObject, kDirect, kNeedsEnvironment) \ + V(UnsafeGet, kDirect, kNeedsEnvironment) \ + V(UnsafeGetVolatile, kDirect, kNeedsEnvironment) \ + V(UnsafeGetObject, kDirect, kNeedsEnvironment) \ + V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironment) \ + V(UnsafeGetLong, kDirect, kNeedsEnvironment) \ + V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironment) \ + V(UnsafePut, kDirect, kNeedsEnvironment) \ + V(UnsafePutOrdered, kDirect, kNeedsEnvironment) \ + V(UnsafePutVolatile, kDirect, kNeedsEnvironment) \ + V(UnsafePutObject, kDirect, kNeedsEnvironment) \ + V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironment) \ + V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironment) \ + V(UnsafePutLong, kDirect, kNeedsEnvironment) \ + V(UnsafePutLongOrdered, kDirect, kNeedsEnvironment) \ + V(UnsafePutLongVolatile, kDirect, kNeedsEnvironment) \ + V(ReferenceGetReferent, kDirect, kNeedsEnvironment) #endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ #undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 0d6ca09f31..4471d713e2 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -945,6 +945,97 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + + // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. + locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(EDI)); + + // Set output, ESI needed for repe_cmpsl instruction anyways. + locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = locations->InAt(0).AsRegister<Register>(); + Register arg = locations->InAt(1).AsRegister<Register>(); + Register ecx = locations->GetTemp(0).AsRegister<Register>(); + Register edi = locations->GetTemp(1).AsRegister<Register>(); + Register esi = locations->Out().AsRegister<Register>(); + + Label end; + Label return_true; + Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check if input is null, return false if it is. + __ testl(arg, arg); + __ j(kEqual, &return_false); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ movl(ecx, Address(str, class_offset)); + __ cmpl(ecx, Address(arg, class_offset)); + __ j(kNotEqual, &return_false); + + // Reference equality check, return true if same reference. + __ cmpl(str, arg); + __ j(kEqual, &return_true); + + // Load length of receiver string. + __ movl(ecx, Address(str, count_offset)); + // Check if lengths are equal, return false if they're not. + __ cmpl(ecx, Address(arg, count_offset)); + __ j(kNotEqual, &return_false); + // Return true if both strings are empty. + __ testl(ecx, ecx); + __ j(kEqual, &return_true); + + // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. + __ leal(esi, Address(str, value_offset)); + __ leal(edi, Address(arg, value_offset)); + + // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths. + __ addl(ecx, Immediate(1)); + __ shrl(ecx, Immediate(1)); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings two characters at a time starting at the beginning of the string. + __ repe_cmpsl(); + // If strings are not equal, zero flag will be cleared. + __ j(kNotEqual, &return_false); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ movl(esi, Immediate(1)); + __ jmp(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ xorl(esi, esi); + __ Bind(&end); +} + static void CreateStringIndexOfLocations(HInvoke* invoke, ArenaAllocator* allocator, bool start_at_zero) { @@ -1756,6 +1847,8 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) +UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 4292ec7b99..ac68f39904 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ea342e9382..9ea68ec07d 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -282,8 +282,6 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - // TODO: Allow x86 to work with memory. This requires assembler support, see below. - // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } @@ -294,34 +292,18 @@ static void MathAbsFP(LocationSummary* locations, CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - if (output.IsFpuRegister()) { - // In-register - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK(output.IsFpuRegister()); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } + // TODO: Can mask directly with constant area using pand if we can guarantee + // that the literal is aligned on a 16 byte boundary. This will avoid a + // temporary. + if (is64bit) { + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - // TODO: update when assember support is available. - UNIMPLEMENTED(FATAL) << "Needs assembler support."; -// Once assembler support is available, in-memory operations look like this: -// if (is64bit) { -// DCHECK(output.IsDoubleStackSlot()); -// // No 64b and with literal. -// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); -// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); -// } else { -// DCHECK(output.IsStackSlot()); -// // Can use and with a literal directly. -// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); -// } + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } @@ -690,7 +672,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); return; } @@ -732,7 +714,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { // And truncate to an integer. __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + // Load maxInt into out. + codegen_->Load64BitValue(out, kPrimIntMax); + // if inPlusPointFive >= maxInt goto done + __ movl(out, Immediate(kPrimIntMax)); __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); __ j(kAboveEqual, &done); @@ -776,7 +762,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // And truncate to an integer. __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); + // Load maxLong into out. + codegen_->Load64BitValue(out, kPrimLongMax); + // if inPlusPointFive >= maxLong goto done + __ movq(out, Immediate(kPrimLongMax)); __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); __ j(kAboveEqual, &done); @@ -864,6 +854,97 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + + // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction. + locations->AddTemp(Location::RegisterLocation(RCX)); + locations->AddTemp(Location::RegisterLocation(RDI)); + + // Set output, RSI needed for repe_cmpsq instruction anyways. + locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister rsi = locations->Out().AsRegister<CpuRegister>(); + + Label end; + Label return_true; + Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check if input is null, return false if it is. + __ testl(arg, arg); + __ j(kEqual, &return_false); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ movl(rcx, Address(str, class_offset)); + __ cmpl(rcx, Address(arg, class_offset)); + __ j(kNotEqual, &return_false); + + // Reference equality check, return true if same reference. + __ cmpl(str, arg); + __ j(kEqual, &return_true); + + // Load length of receiver string. + __ movl(rcx, Address(str, count_offset)); + // Check if lengths are equal, return false if they're not. + __ cmpl(rcx, Address(arg, count_offset)); + __ j(kNotEqual, &return_false); + // Return true if both strings are empty. + __ testl(rcx, rcx); + __ j(kEqual, &return_true); + + // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. + __ leal(rsi, Address(str, value_offset)); + __ leal(rdi, Address(arg, value_offset)); + + // Divide string length by 4 and adjust for lengths not divisible by 4. + __ addl(rcx, Immediate(3)); + __ shrl(rcx, Immediate(2)); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); + + // Loop to compare strings four characters at a time starting at the beginning of the string. + __ repe_cmpsq(); + // If strings are not equal, zero flag will be cleared. + __ j(kNotEqual, &return_false); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ movl(rsi, Immediate(1)); + __ jmp(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ xorl(rsi, rsi); + __ Bind(&end); +} + static void CreateStringIndexOfLocations(HInvoke* invoke, ArenaAllocator* allocator, bool start_at_zero) { @@ -1615,6 +1696,8 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) +UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 0e0e72c1fc..17293af4d2 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 2535ea274a..5b89b4ec74 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -115,7 +115,7 @@ void LICM::Run() { HInstruction* instruction = inst_it.Current(); if (instruction->CanBeMoved() && (!instruction->CanThrow() || !found_first_non_hoisted_throwing_instruction_in_loop) - && !instruction->GetSideEffects().DependsOn(loop_effects) + && !instruction->GetSideEffects().MayDependOn(loop_effects) && InputsAreDefinedBeforeLoop(instruction)) { // We need to update the environment if the instruction has a loop header // phi in it. diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc new file mode 100644 index 0000000000..2fc66e6de4 --- /dev/null +++ b/compiler/optimizing/licm_test.cc @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/arena_allocator.h" +#include "builder.h" +#include "gtest/gtest.h" +#include "licm.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +namespace art { + +/** + * Fixture class for the LICM tests. + */ +class LICMTest : public testing::Test { + public: + LICMTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); + } + + ~LICMTest() { } + + // Builds a singly-nested loop structure in CFG. Tests can further populate + // the basic blocks with instructions to set up interesting scenarios. + void BuildLoop() { + entry_ = new (&allocator_) HBasicBlock(graph_); + loop_preheader_ = new (&allocator_) HBasicBlock(graph_); + loop_header_ = new (&allocator_) HBasicBlock(graph_); + loop_body_ = new (&allocator_) HBasicBlock(graph_); + exit_ = new (&allocator_) HBasicBlock(graph_); + + graph_->AddBlock(entry_); + graph_->AddBlock(loop_preheader_); + graph_->AddBlock(loop_header_); + graph_->AddBlock(loop_body_); + graph_->AddBlock(exit_); + + graph_->SetEntryBlock(entry_); + graph_->SetExitBlock(exit_); + + // Set up loop flow in CFG. + entry_->AddSuccessor(loop_preheader_); + loop_preheader_->AddSuccessor(loop_header_); + loop_header_->AddSuccessor(loop_body_); + loop_header_->AddSuccessor(exit_); + loop_body_->AddSuccessor(loop_header_); + + // Provide boiler-plate instructions. + parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot); + entry_->AddInstruction(parameter_); + constant_ = new (&allocator_) HConstant(Primitive::kPrimInt); + loop_preheader_->AddInstruction(constant_); + loop_header_->AddInstruction(new (&allocator_) HIf(parameter_)); + loop_body_->AddInstruction(new (&allocator_) HGoto()); + exit_->AddInstruction(new (&allocator_) HExit()); + } + + // Performs LICM optimizations (after proper set up). + void PerformLICM() { + ASSERT_TRUE(graph_->TryBuildingSsa()); + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + LICM licm(graph_, side_effects); + licm.Run(); + } + + // General building fields. + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + + // Specific basic blocks. + HBasicBlock* entry_; + HBasicBlock* loop_preheader_; + HBasicBlock* loop_header_; + HBasicBlock* loop_body_; + HBasicBlock* exit_; + + HInstruction* parameter_; // "this" + HInstruction* constant_; +}; + +// +// The actual LICM tests. +// + +TEST_F(LICMTest, ConstantHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set array to constant. + HInstruction* constant = new (&allocator_) HConstant(Primitive::kPrimDouble); + loop_body_->InsertInstructionBefore(constant, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, constant_, constant, Primitive::kPrimDouble, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + EXPECT_EQ(constant->GetBlock(), loop_body_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + EXPECT_EQ(constant->GetBlock(), loop_preheader_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, FieldHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get field with different types. + HInstruction* get_field = new (&allocator_) HInstanceFieldGet( + parameter_, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); + HInstruction* set_field = new (&allocator_) HInstanceFieldSet( + parameter_, constant_, Primitive::kPrimInt, MemberOffset(20), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); + + EXPECT_EQ(get_field->GetBlock(), loop_body_); + EXPECT_EQ(set_field->GetBlock(), loop_body_); + PerformLICM(); + EXPECT_EQ(get_field->GetBlock(), loop_preheader_); + EXPECT_EQ(set_field->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, NoFieldHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get field with same types. + HInstruction* get_field = new (&allocator_) HInstanceFieldGet( + parameter_, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); + HInstruction* set_field = new (&allocator_) HInstanceFieldSet( + parameter_, get_field, Primitive::kPrimLong, MemberOffset(10), + false, kUnknownFieldIndex, graph_->GetDexFile()); + loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); + + EXPECT_EQ(get_field->GetBlock(), loop_body_); + EXPECT_EQ(set_field->GetBlock(), loop_body_); + PerformLICM(); + EXPECT_EQ(get_field->GetBlock(), loop_body_); + EXPECT_EQ(set_field->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, ArrayHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get array with different types. + HInstruction* get_array = new (&allocator_) HArrayGet( + parameter_, constant_, Primitive::kPrimLong); + loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, constant_, constant_, Primitive::kPrimInt, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + EXPECT_EQ(get_array->GetBlock(), loop_body_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + EXPECT_EQ(get_array->GetBlock(), loop_preheader_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); +} + +TEST_F(LICMTest, NoArrayHoisting) { + BuildLoop(); + + // Populate the loop with instructions: set/get array with same types. + HInstruction* get_array = new (&allocator_) HArrayGet( + parameter_, constant_, Primitive::kPrimLong); + loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); + HInstruction* set_array = new (&allocator_) HArraySet( + parameter_, get_array, constant_, Primitive::kPrimLong, 0); + loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); + + EXPECT_EQ(get_array->GetBlock(), loop_body_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); + PerformLICM(); + EXPECT_EQ(get_array->GetBlock(), loop_body_); + EXPECT_EQ(set_array->GetBlock(), loop_body_); +} + +} // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 588ab70001..b6a1980217 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -98,26 +98,31 @@ void HGraph::VisitBlockForBackEdges(HBasicBlock* block, } void HGraph::BuildDominatorTree() { + // (1) Simplify the CFG so that catch blocks have only exceptional incoming + // edges. This invariant simplifies building SSA form because Phis cannot + // collect both normal- and exceptional-flow values at the same time. + SimplifyCatchBlocks(); + ArenaBitVector visited(arena_, blocks_.Size(), false); - // (1) Find the back edges in the graph doing a DFS traversal. + // (2) Find the back edges in the graph doing a DFS traversal. FindBackEdges(&visited); - // (2) Remove instructions and phis from blocks not visited during + // (3) Remove instructions and phis from blocks not visited during // the initial DFS as users from other instructions, so that // users can be safely removed before uses later. RemoveInstructionsAsUsersFromDeadBlocks(visited); - // (3) Remove blocks not visited during the initial DFS. + // (4) Remove blocks not visited during the initial DFS. // Step (4) requires dead blocks to be removed from the // predecessors list of live blocks. RemoveDeadBlocks(visited); - // (4) Simplify the CFG now, so that we don't need to recompute + // (5) Simplify the CFG now, so that we don't need to recompute // dominators and the reverse post order. SimplifyCFG(); - // (5) Compute the dominance information and the reverse post order. + // (6) Compute the dominance information and the reverse post order. ComputeDominanceInformation(); } @@ -261,6 +266,83 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { info->SetSuspendCheck(first_instruction->AsSuspendCheck()); } +static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) { + HBasicBlock* predecessor = block.GetPredecessors().Get(pred_idx); + if (!predecessor->EndsWithTryBoundary()) { + // Only edges from HTryBoundary can be exceptional. + return false; + } + HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary(); + if (try_boundary->GetNormalFlowSuccessor() == &block) { + // This block is the normal-flow successor of `try_boundary`, but it could + // also be one of its exception handlers if catch blocks have not been + // simplified yet. Predecessors are unordered, so we will consider the first + // occurrence to be the normal edge and a possible second occurrence to be + // the exceptional edge. + return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx); + } else { + // This is not the normal-flow successor of `try_boundary`, hence it must be + // one of its exception handlers. + DCHECK(try_boundary->HasExceptionHandler(block)); + return true; + } +} + +void HGraph::SimplifyCatchBlocks() { + for (size_t i = 0; i < blocks_.Size(); ++i) { + HBasicBlock* catch_block = blocks_.Get(i); + if (!catch_block->IsCatchBlock()) { + continue; + } + + bool exceptional_predecessors_only = true; + for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + exceptional_predecessors_only = false; + break; + } + } + + if (!exceptional_predecessors_only) { + // Catch block has normal-flow predecessors and needs to be simplified. + // Splitting the block before its first instruction moves all its + // instructions into `normal_block` and links the two blocks with a Goto. + // Afterwards, incoming normal-flow edges are re-linked to `normal_block`, + // leaving `catch_block` with the exceptional edges only. + // Note that catch blocks with normal-flow predecessors cannot begin with + // a MOVE_EXCEPTION instruction, as guaranteed by the verifier. + DCHECK(!catch_block->GetFirstInstruction()->IsLoadException()); + HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction()); + for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + catch_block->GetPredecessors().Get(j)->ReplaceSuccessor(catch_block, normal_block); + --j; + } + } + } + } +} + +void HGraph::ComputeTryBlockInformation() { + // Iterate in reverse post order to propagate try membership information from + // predecessors to their successors. + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (block->IsEntryBlock() || block->IsCatchBlock()) { + // Catch blocks after simplification have only exceptional predecessors + // and hence are never in tries. + continue; + } + + // Infer try membership from the first predecessor. Having simplified loops, + // the first predecessor can never be a back edge and therefore it must have + // been visited already and had its try membership set. + HBasicBlock* first_predecessor = block->GetPredecessors().Get(0); + DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor)); + block->SetTryEntry(first_predecessor->ComputeTryEntryOfSuccessors()); + } +} + void HGraph::SimplifyCFG() { // Simplify the CFG for future analysis, and code generation: // (1): Split critical edges. @@ -268,9 +350,10 @@ void HGraph::SimplifyCFG() { for (size_t i = 0; i < blocks_.Size(); ++i) { HBasicBlock* block = blocks_.Get(i); if (block == nullptr) continue; - if (block->GetSuccessors().Size() > 1) { + if (block->NumberOfNormalSuccessors() > 1) { for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); + DCHECK(!successor->IsCatchBlock()); if (successor->GetPredecessors().Size() > 1) { SplitCriticalEdge(block, successor); --j; @@ -288,6 +371,11 @@ bool HGraph::AnalyzeNaturalLoops() const { for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return false; + } HLoopInformation* info = block->GetLoopInformation(); if (!info->Populate()) { // Abort if the loop is non natural. We currently bailout in such cases. @@ -476,6 +564,13 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { return false; } +void HBasicBlock::AddExceptionalPredecessor(HInstruction* exceptional_predecessor) { + DCHECK(exceptional_predecessor->CanThrow()); + DCHECK(exceptional_predecessor->GetBlock()->IsInTry()); + DCHECK(exceptional_predecessor->GetBlock()->GetTryEntry()->HasExceptionHandler(*this)); + exceptional_predecessors_.Add(exceptional_predecessor); +} + static void UpdateInputsUsers(HInstruction* instruction) { for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { instruction->InputAt(i)->AddUseAt(instruction, i); @@ -917,35 +1012,25 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { HConstant* HUnaryOperation::TryStaticEvaluation() const { if (GetInput()->IsIntConstant()) { - int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue()); - return GetBlock()->GetGraph()->GetIntConstant(value); + return Evaluate(GetInput()->AsIntConstant()); } else if (GetInput()->IsLongConstant()) { - // TODO: Implement static evaluation of long unary operations. - // - // Do not exit with a fatal condition here. Instead, simply - // return `null' to notify the caller that this instruction - // cannot (yet) be statically evaluated. - return nullptr; + return Evaluate(GetInput()->AsLongConstant()); } return nullptr; } HConstant* HBinaryOperation::TryStaticEvaluation() const { - if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { - int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(), - GetRight()->AsIntConstant()->GetValue()); - return GetBlock()->GetGraph()->GetIntConstant(value); - } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) { - int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(), - GetRight()->AsLongConstant()->GetValue()); - if (GetResultType() == Primitive::kPrimLong) { - return GetBlock()->GetGraph()->GetLongConstant(value); - } else if (GetResultType() == Primitive::kPrimBoolean) { - // This can be the result of an HCondition evaluation. - return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value)); - } else { - DCHECK_EQ(GetResultType(), Primitive::kPrimInt); - return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value)); + if (GetLeft()->IsIntConstant()) { + if (GetRight()->IsIntConstant()) { + return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant()); + } else if (GetRight()->IsLongConstant()) { + return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsLongConstant()); + } + } else if (GetLeft()->IsLongConstant()) { + if (GetRight()->IsIntConstant()) { + return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant()); + } else if (GetRight()->IsLongConstant()) { + return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant()); } } return nullptr; @@ -1086,10 +1171,20 @@ HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { return new_block; } -bool HBasicBlock::IsExceptionalSuccessor(size_t idx) const { - return !GetInstructions().IsEmpty() - && GetLastInstruction()->IsTryBoundary() - && GetLastInstruction()->AsTryBoundary()->IsExceptionalSuccessor(idx); +HTryBoundary* HBasicBlock::ComputeTryEntryOfSuccessors() const { + if (EndsWithTryBoundary()) { + HTryBoundary* try_boundary = GetLastInstruction()->AsTryBoundary(); + if (try_boundary->IsEntry()) { + DCHECK(try_entry_ == nullptr); + return try_boundary; + } else { + DCHECK(try_entry_ != nullptr); + DCHECK(try_entry_->HasSameExceptionHandlersAs(*try_boundary)); + return nullptr; + } + } else { + return try_entry_; + } } static bool HasOnlyOneInstruction(const HBasicBlock& block) { @@ -1114,10 +1209,31 @@ bool HBasicBlock::EndsWithIf() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf(); } +bool HBasicBlock::EndsWithTryBoundary() const { + return !GetInstructions().IsEmpty() && GetLastInstruction()->IsTryBoundary(); +} + bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const { + if (GetBlock()->GetSuccessors().Size() != other.GetBlock()->GetSuccessors().Size()) { + return false; + } + + // Exception handlers need to be stored in the same order. + for (HExceptionHandlerIterator it1(*this), it2(other); + !it1.Done(); + it1.Advance(), it2.Advance()) { + DCHECK(!it2.Done()); + if (it1.Current() != it2.Current()) { + return false; + } + } + return true; +} + size_t HInstructionList::CountSize() const { size_t size = 0; HInstruction* current = first_instruction_; @@ -1368,7 +1484,7 @@ void HGraph::DeleteDeadBlock(HBasicBlock* block) { blocks_.Put(block->GetBlockId(), nullptr); } -void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { +HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { DCHECK(HasExitBlock()) << "Unimplemented scenario"; // Update the environments in this graph to have the invoke's environment // as parent. @@ -1393,6 +1509,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->SetHasBoundsChecks(true); } + HInstruction* return_value = nullptr; if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. // Put the body block's instruction into `invoke`'s block. @@ -1407,7 +1524,8 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Replace the invoke with the return value of the inlined graph. if (last->IsReturn()) { - invoke->ReplaceWith(last->InputAt(0)); + return_value = last->InputAt(0); + invoke->ReplaceWith(return_value); } else { DCHECK(last->IsReturnVoid()); } @@ -1429,7 +1547,6 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update all predecessors of the exit block (now the `to` block) // to not `HReturn` but `HGoto` instead. - HInstruction* return_value = nullptr; bool returns_void = to->GetPredecessors().Get(0)->GetLastInstruction()->IsReturnVoid(); if (to->GetPredecessors().Size() == 1) { HBasicBlock* predecessor = to->GetPredecessors().Get(0); @@ -1563,6 +1680,8 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Finally remove the invoke from the caller. invoke->GetBlock()->RemoveInstruction(invoke); + + return return_value; } /* @@ -1640,11 +1759,39 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { } } +void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { + if (kIsDebugBuild) { + DCHECK_EQ(GetType(), Primitive::kPrimNot); + ScopedObjectAccess soa(Thread::Current()); + DCHECK(rti.IsValid()) << "Invalid RTI for " << DebugName(); + if (IsBoundType()) { + // Having the test here spares us from making the method virtual just for + // the sake of a DCHECK. + ReferenceTypeInfo upper_bound_rti = AsBoundType()->GetUpperBound(); + DCHECK(upper_bound_rti.IsSupertypeOf(rti)) + << " upper_bound_rti: " << upper_bound_rti + << " rti: " << rti; + DCHECK(!upper_bound_rti.GetTypeHandle()->IsFinal() || rti.IsExact()); + } + } + reference_type_info_ = rti; +} + +ReferenceTypeInfo::ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {} + +ReferenceTypeInfo::ReferenceTypeInfo(TypeHandle type_handle, bool is_exact) + : type_handle_(type_handle), is_exact_(is_exact) { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(IsValidHandle(type_handle)); + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" - << " is_top=" << rhs.IsTop() - << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get())) + << " is_valid=" << rhs.IsValid() + << " type=" << (!rhs.IsValid() ? "?" : PrettyClass(rhs.GetTypeHandle().Get())) << " is_exact=" << rhs.IsExact() << " ]"; return os; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 59255d14c3..0df5d6dd2e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -17,6 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_H_ #define ART_COMPILER_OPTIMIZING_NODES_H_ +#include <type_traits> + #include "base/arena_containers.h" #include "base/arena_object.h" #include "dex/compiler_enums.h" @@ -38,6 +40,7 @@ class HBasicBlock; class HCurrentMethod; class HDoubleConstant; class HEnvironment; +class HFakeString; class HFloatConstant; class HGraphBuilder; class HGraphVisitor; @@ -48,6 +51,7 @@ class HLongConstant; class HNullConstant; class HPhi; class HSuspendCheck; +class HTryBoundary; class LiveInterval; class LocationSummary; class SlowPathCode; @@ -56,6 +60,7 @@ class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; static const int kDefaultNumberOfSuccessors = 2; static const int kDefaultNumberOfPredecessors = 2; +static const int kDefaultNumberOfExceptionalPredecessors = 0; static const int kDefaultNumberOfDominatedBlocks = 1; static const int kDefaultNumberOfBackEdges = 1; @@ -181,6 +186,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // visit for eliminating dead phis: a dead phi can only have loop header phi // users remaining when being visited. if (!AnalyzeNaturalLoops()) return false; + // Precompute per-block try membership before entering the SSA builder, + // which needs the information to build catch block phis from values of + // locals at throwing instructions inside try blocks. + ComputeTryBlockInformation(); TransformToSsa(); in_ssa_form_ = true; return true; @@ -192,14 +201,21 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void BuildDominatorTree(); void TransformToSsa(); void SimplifyCFG(); + void SimplifyCatchBlocks(); // Analyze all natural loops in this graph. Returns false if one // loop is not natural, that is the header does not dominate the // back edge. bool AnalyzeNaturalLoops() const; + // Iterate over blocks to compute try block membership. Needs reverse post + // order and loop information. + void ComputeTryBlockInformation(); + // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. - void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Returns the instruction used to replace the invoke expression or null if the + // invoke is for a void method. + HInstruction* InlineInto(HGraph* outer_graph, HInvoke* invoke); // Need to add a couple of blocks to test if the loop body is entered and // put deoptimization instructions, etc. @@ -295,7 +311,12 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // already, it is created and inserted into the graph. This method is only for // integral types. HConstant* GetConstant(Primitive::Type type, int64_t value); + + // TODO: This is problematic for the consistency of reference type propagation + // because it can be created anytime after the pass and thus it will be left + // with an invalid type. HNullConstant* GetNullConstant(); + HIntConstant* GetIntConstant(int32_t value) { return CreateConstant(value, &cached_int_constants_); } @@ -546,6 +567,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc) : graph_(graph), predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors), + exceptional_predecessors_(graph->GetArena(), kDefaultNumberOfExceptionalPredecessors), successors_(graph->GetArena(), kDefaultNumberOfSuccessors), loop_information_(nullptr), dominator_(nullptr), @@ -560,6 +582,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return predecessors_; } + const GrowableArray<HInstruction*>& GetExceptionalPredecessors() const { + return exceptional_predecessors_; + } + const GrowableArray<HBasicBlock*>& GetSuccessors() const { return successors_; } @@ -628,6 +654,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HInstruction* GetLastPhi() const { return phis_.last_instruction_; } const HInstructionList& GetPhis() const { return phis_; } + void AddExceptionalPredecessor(HInstruction* exceptional_predecessor); + void AddSuccessor(HBasicBlock* block) { successors_.Add(block); block->predecessors_.Add(this); @@ -667,6 +695,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { predecessors_.Delete(block); } + void RemoveExceptionalPredecessor(HInstruction* instruction) { + exceptional_predecessors_.Delete(instruction); + } + void RemoveSuccessor(HBasicBlock* block) { successors_.Delete(block); } @@ -703,6 +735,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return -1; } + size_t GetExceptionalPredecessorIndexOf(HInstruction* exceptional_predecessor) const { + for (size_t i = 0, e = exceptional_predecessors_.Size(); i < e; ++i) { + if (exceptional_predecessors_.Get(i) == exceptional_predecessor) { + return i; + } + } + return -1; + } + size_t GetSuccessorIndexOf(HBasicBlock* successor) const { for (size_t i = 0, e = successors_.Size(); i < e; ++i) { if (successors_.Get(i) == successor) { @@ -729,8 +770,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return GetPredecessorIndexOf(predecessor) == idx; } - // Returns whether successor at index `idx` is an exception handler. - bool IsExceptionalSuccessor(size_t idx) const; + // Returns the number of non-exceptional successors. SsaChecker ensures that + // these are stored at the beginning of the successor list. + size_t NumberOfNormalSuccessors() const { + return EndsWithTryBoundary() ? 1 : GetSuccessors().Size(); + } // Split the block into two blocks just before `cursor`. Returns the newly // created, latter block. Note that this method will add the block to the @@ -829,6 +873,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool IsInLoop() const { return loop_information_ != nullptr; } + HTryBoundary* GetTryEntry() const { return try_entry_; } + void SetTryEntry(HTryBoundary* try_entry) { try_entry_ = try_entry; } + bool IsInTry() const { return try_entry_ != nullptr; } + + // Returns the try entry that this block's successors should have. They will + // be in the same try, unless the block ends in a try boundary. In that case, + // the appropriate try entry will be returned. + HTryBoundary* ComputeTryEntryOfSuccessors() const; + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; @@ -845,11 +898,13 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool EndsWithControlFlowInstruction() const; bool EndsWithIf() const; + bool EndsWithTryBoundary() const; bool HasSinglePhi() const; private: HGraph* graph_; GrowableArray<HBasicBlock*> predecessors_; + GrowableArray<HInstruction*> exceptional_predecessors_; GrowableArray<HBasicBlock*> successors_; HInstructionList instructions_; HInstructionList phis_; @@ -863,6 +918,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { size_t lifetime_end_; bool is_catch_block_; + // If this block is in a try block, `try_entry_` stores one of, possibly + // several, TryBoundary instructions entering it. + HTryBoundary* try_entry_; + friend class HGraph; friend class HInstruction; @@ -904,6 +963,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(BoundsCheck, Instruction) \ M(BoundType, Instruction) \ M(CheckCast, Instruction) \ + M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ M(Condition, BinaryOperation) \ @@ -914,6 +974,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(DoubleConstant, Constant) \ M(Equal, Condition) \ M(Exit, Instruction) \ + M(FakeString, Instruction) \ M(FloatConstant, Constant) \ M(Goto, Instruction) \ M(GreaterThan, Condition) \ @@ -1153,13 +1214,35 @@ class HUserRecord : public ValueObject { HUseListNode<T>* use_node_; }; -// TODO: Add better documentation to this class and maybe refactor with more suggestive names. -// - Has(All)SideEffects suggests that all the side effects are present but only ChangesSomething -// flag is consider. -// - DependsOn suggests that there is a real dependency between side effects but it only -// checks DependendsOnSomething flag. -// -// Represents the side effects an instruction may have. +/** + * Side-effects representation. + * + * For write/read dependences on fields/arrays, the dependence analysis uses + * type disambiguation (e.g. a float field write cannot modify the value of an + * integer field read) and the access type (e.g. a reference array write cannot + * modify the value of a reference field read [although it may modify the + * reference fetch prior to reading the field, which is represented by its own + * write/read dependence]). The analysis makes conservative points-to + * assumptions on reference types (e.g. two same typed arrays are assumed to be + * the same, and any reference read depends on any reference read without + * further regard of its type). + * + * The internal representation uses 38-bit and is described in the table below. + * The first line indicates the side effect, and for field/array accesses the + * second line indicates the type of the access (in the order of the + * Primitive::Type enum). + * The two numbered lines below indicate the bit position in the bitfield (read + * vertically). + * + * |Depends on GC|ARRAY-R |FIELD-R |Can trigger GC|ARRAY-W |FIELD-W | + * +-------------+---------+---------+--------------+---------+---------+ + * | |DFJISCBZL|DFJISCBZL| |DFJISCBZL|DFJISCBZL| + * | 3 |333333322|222222221| 1 |111111110|000000000| + * | 7 |654321098|765432109| 8 |765432109|876543210| + * + * Note that, to ease the implementation, 'changes' bits are least significant + * bits, while 'dependency' bits are most significant bits. + */ class SideEffects : public ValueObject { public: SideEffects() : flags_(0) {} @@ -1169,57 +1252,204 @@ class SideEffects : public ValueObject { } static SideEffects All() { - return SideEffects(ChangesSomething().flags_ | DependsOnSomething().flags_); + return SideEffects(kAllChangeBits | kAllDependOnBits); } - static SideEffects ChangesSomething() { - return SideEffects((1 << kFlagChangesCount) - 1); + static SideEffects AllChanges() { + return SideEffects(kAllChangeBits); } - static SideEffects DependsOnSomething() { - int count = kFlagDependsOnCount - kFlagChangesCount; - return SideEffects(((1 << count) - 1) << kFlagChangesCount); + static SideEffects AllDependencies() { + return SideEffects(kAllDependOnBits); } + static SideEffects AllExceptGCDependency() { + return AllWritesAndReads().Union(SideEffects::CanTriggerGC()); + } + + static SideEffects AllWritesAndReads() { + return SideEffects(kAllWrites | kAllReads); + } + + static SideEffects AllWrites() { + return SideEffects(kAllWrites); + } + + static SideEffects AllReads() { + return SideEffects(kAllReads); + } + + static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) { + return is_volatile + ? AllWritesAndReads() + : SideEffects(TypeFlagWithAlias(type, kFieldWriteOffset)); + } + + static SideEffects ArrayWriteOfType(Primitive::Type type) { + return SideEffects(TypeFlagWithAlias(type, kArrayWriteOffset)); + } + + static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) { + return is_volatile + ? AllWritesAndReads() + : SideEffects(TypeFlagWithAlias(type, kFieldReadOffset)); + } + + static SideEffects ArrayReadOfType(Primitive::Type type) { + return SideEffects(TypeFlagWithAlias(type, kArrayReadOffset)); + } + + static SideEffects CanTriggerGC() { + return SideEffects(1ULL << kCanTriggerGCBit); + } + + static SideEffects DependsOnGC() { + return SideEffects(1ULL << kDependsOnGCBit); + } + + // Combines the side-effects of this and the other. SideEffects Union(SideEffects other) const { return SideEffects(flags_ | other.flags_); } - bool HasSideEffects() const { - size_t all_bits_set = (1 << kFlagChangesCount) - 1; - return (flags_ & all_bits_set) != 0; + SideEffects Exclusion(SideEffects other) const { + return SideEffects(flags_ & ~other.flags_); } - bool HasAllSideEffects() const { - size_t all_bits_set = (1 << kFlagChangesCount) - 1; - return all_bits_set == (flags_ & all_bits_set); + bool Includes(SideEffects other) const { + return (other.flags_ & flags_) == other.flags_; } - bool DependsOn(SideEffects other) const { - size_t depends_flags = other.ComputeDependsFlags(); - return (flags_ & depends_flags) != 0; + bool HasSideEffects() const { + return (flags_ & kAllChangeBits); } bool HasDependencies() const { - int count = kFlagDependsOnCount - kFlagChangesCount; - size_t all_bits_set = (1 << count) - 1; - return ((flags_ >> kFlagChangesCount) & all_bits_set) != 0; + return (flags_ & kAllDependOnBits); + } + + // Returns true if there are no side effects or dependencies. + bool DoesNothing() const { + return flags_ == 0; + } + + // Returns true if something is written. + bool DoesAnyWrite() const { + return (flags_ & kAllWrites); } + // Returns true if something is read. + bool DoesAnyRead() const { + return (flags_ & kAllReads); + } + + // Returns true if potentially everything is written and read + // (every type and every kind of access). + bool DoesAllReadWrite() const { + return (flags_ & (kAllWrites | kAllReads)) == (kAllWrites | kAllReads); + } + + bool DoesAll() const { + return flags_ == (kAllChangeBits | kAllDependOnBits); + } + + // Returns true if this may read something written by other. + bool MayDependOn(SideEffects other) const { + const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits; + return (other.flags_ & depends_on_flags); + } + + // Returns string representation of flags (for debugging only). + // Format: |x|DFJISCBZL|DFJISCBZL|y|DFJISCBZL|DFJISCBZL| + std::string ToString() const { + std::string flags = "|"; + for (int s = kLastBit; s >= 0; s--) { + bool current_bit_is_set = ((flags_ >> s) & 1) != 0; + if ((s == kDependsOnGCBit) || (s == kCanTriggerGCBit)) { + // This is a bit for the GC side effect. + if (current_bit_is_set) { + flags += "GC"; + } + flags += "|"; + } else { + // This is a bit for the array/field analysis. + // The underscore character stands for the 'can trigger GC' bit. + static const char *kDebug = "LZBCSIJFDLZBCSIJFD_LZBCSIJFDLZBCSIJFD"; + if (current_bit_is_set) { + flags += kDebug[s]; + } + if ((s == kFieldWriteOffset) || (s == kArrayWriteOffset) || + (s == kFieldReadOffset) || (s == kArrayReadOffset)) { + flags += "|"; + } + } + } + return flags; + } + + bool Equals(const SideEffects& other) const { return flags_ == other.flags_; } + private: - static constexpr int kFlagChangesSomething = 0; - static constexpr int kFlagChangesCount = kFlagChangesSomething + 1; + static constexpr int kFieldArrayAnalysisBits = 9; + + static constexpr int kFieldWriteOffset = 0; + static constexpr int kArrayWriteOffset = kFieldWriteOffset + kFieldArrayAnalysisBits; + static constexpr int kLastBitForWrites = kArrayWriteOffset + kFieldArrayAnalysisBits - 1; + static constexpr int kCanTriggerGCBit = kLastBitForWrites + 1; + + static constexpr int kChangeBits = kCanTriggerGCBit + 1; + + static constexpr int kFieldReadOffset = kCanTriggerGCBit + 1; + static constexpr int kArrayReadOffset = kFieldReadOffset + kFieldArrayAnalysisBits; + static constexpr int kLastBitForReads = kArrayReadOffset + kFieldArrayAnalysisBits - 1; + static constexpr int kDependsOnGCBit = kLastBitForReads + 1; + + static constexpr int kLastBit = kDependsOnGCBit; + static constexpr int kDependOnBits = kLastBit + 1 - kChangeBits; - static constexpr int kFlagDependsOnSomething = kFlagChangesCount; - static constexpr int kFlagDependsOnCount = kFlagDependsOnSomething + 1; + // Aliases. - explicit SideEffects(size_t flags) : flags_(flags) {} + static_assert(kChangeBits == kDependOnBits, + "the 'change' bits should match the 'depend on' bits."); + + static constexpr uint64_t kAllChangeBits = ((1ULL << kChangeBits) - 1); + static constexpr uint64_t kAllDependOnBits = ((1ULL << kDependOnBits) - 1) << kChangeBits; + static constexpr uint64_t kAllWrites = + ((1ULL << (kLastBitForWrites + 1 - kFieldWriteOffset)) - 1) << kFieldWriteOffset; + static constexpr uint64_t kAllReads = + ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset; + + // Work around the fact that HIR aliases I/F and J/D. + // TODO: remove this interceptor once HIR types are clean + static uint64_t TypeFlagWithAlias(Primitive::Type type, int offset) { + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + return TypeFlag(Primitive::kPrimInt, offset) | + TypeFlag(Primitive::kPrimFloat, offset); + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + return TypeFlag(Primitive::kPrimLong, offset) | + TypeFlag(Primitive::kPrimDouble, offset); + default: + return TypeFlag(type, offset); + } + } - size_t ComputeDependsFlags() const { - return flags_ << kFlagChangesCount; + // Translates type to bit flag. + static uint64_t TypeFlag(Primitive::Type type, int offset) { + CHECK_NE(type, Primitive::kPrimVoid); + const uint64_t one = 1; + const int shift = type; // 0-based consecutive enum + DCHECK_LE(kFieldWriteOffset, shift); + DCHECK_LT(shift, kArrayWriteOffset); + return one << (type + offset); } - size_t flags_; + // Private constructor on direct flags value. + explicit SideEffects(uint64_t flags) : flags_(flags) {} + + uint64_t flags_; }; // A HEnvironment object contains the values of virtual registers at a given location. @@ -1339,8 +1569,7 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { const uint32_t dex_pc_; const InvokeType invoke_type_; - // The instruction that holds this environment. Only used in debug mode - // to ensure the graph is consistent. + // The instruction that holds this environment. HInstruction* const holder_; friend class HInstruction; @@ -1352,79 +1581,64 @@ class ReferenceTypeInfo : ValueObject { public: typedef Handle<mirror::Class> TypeHandle; - static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (type_handle->IsObjectClass()) { - // Override the type handle to be consistent with the case when we get to - // Top but don't have the Object class available. It avoids having to guess - // what value the type_handle has when it's Top. - return ReferenceTypeInfo(TypeHandle(), is_exact, true); - } else { - return ReferenceTypeInfo(type_handle, is_exact, false); - } + static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) { + // The constructor will check that the type_handle is valid. + return ReferenceTypeInfo(type_handle, is_exact); } - static ReferenceTypeInfo CreateTop(bool is_exact) { - return ReferenceTypeInfo(TypeHandle(), is_exact, true); + static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); } + + static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) { + return handle.GetReference() != nullptr; } + bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { + return IsValidHandle(type_handle_); + } bool IsExact() const { return is_exact_; } - bool IsTop() const { return is_top_; } - bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return !IsTop() && GetTypeHandle()->IsInterface(); + + bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsObjectClass(); + } + bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsInterface(); } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (IsTop()) { - // Top (equivalent for java.lang.Object) is supertype of anything. - return true; - } - if (rti.IsTop()) { - // If we get here `this` is not Top() so it can't be a supertype. - return false; - } + bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); } // Returns true if the type information provide the same amount of details. // Note that it does not mean that the instructions have the same actual type - // (e.g. tops are equal but they can be the result of a merge). - bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (IsExact() != rti.IsExact()) { - return false; - } - if (IsTop() && rti.IsTop()) { - // `Top` means java.lang.Object, so the types are equivalent. + // (because the type can be the result of a merge). + bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) { + if (!IsValid() && !rti.IsValid()) { + // Invalid types are equal. return true; } - if (IsTop() || rti.IsTop()) { - // If only one is top or object than they are not equivalent. - // NB: We need this extra check because the type_handle of `Top` is invalid - // and we cannot inspect its reference. + if (!IsValid() || !rti.IsValid()) { + // One is valid, the other not. return false; } - - // Finally check the types. - return GetTypeHandle().Get() == rti.GetTypeHandle().Get(); + return IsExact() == rti.IsExact() + && GetTypeHandle().Get() == rti.GetTypeHandle().Get(); } private: - ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {} - ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top) - : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {} + ReferenceTypeInfo(); + ReferenceTypeInfo(TypeHandle type_handle, bool is_exact); // The class of the object. TypeHandle type_handle_; // Whether or not the type is exact or a superclass of the actual type. // Whether or not we have any information about this type. bool is_exact_; - // A true value here means that the object type should be java.lang.Object. - // We don't have access to the corresponding mirror object every time so this - // flag acts as a substitute. When true, the TypeHandle refers to a null - // pointer and should not be used. - bool is_top_; }; std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); @@ -1442,7 +1656,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { live_interval_(nullptr), lifetime_position_(kNoLifetime), side_effects_(side_effects), - reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {} + reference_type_info_(ReferenceTypeInfo::CreateInvalid()) {} virtual ~HInstruction() {} @@ -1459,6 +1673,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { HInstruction* GetPreviousDisregardingMoves() const; HBasicBlock* GetBlock() const { return block_; } + ArenaAllocator* GetArena() const { return block_->GetGraph()->GetArena(); } void SetBlock(HBasicBlock* block) { block_ = block; } bool IsInBlock() const { return block_ != nullptr; } bool IsInLoop() const { return block_->IsInLoop(); } @@ -1483,10 +1698,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } virtual bool IsControlFlow() const { return false; } virtual bool CanThrow() const { return false; } + bool HasSideEffects() const { return side_effects_.HasSideEffects(); } + bool DoesAnyWrite() const { return side_effects_.DoesAnyWrite(); } // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. + // TODO: Consider merging can_be_null into ReferenceTypeInfo. virtual bool CanBeNull() const { DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types"; return true; @@ -1497,10 +1715,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { return false; } - void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) { - DCHECK_EQ(GetType(), Primitive::kPrimNot); - reference_type_info_ = reference_type_info; - } + void SetReferenceTypeInfo(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { DCHECK_EQ(GetType(), Primitive::kPrimNot); @@ -1926,6 +2141,95 @@ class HGoto : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HGoto); }; +class HConstant : public HExpression<0> { + public: + explicit HConstant(Primitive::Type type) : HExpression(type, SideEffects::None()) {} + + bool CanBeMoved() const OVERRIDE { return true; } + + virtual bool IsMinusOne() const { return false; } + virtual bool IsZero() const { return false; } + virtual bool IsOne() const { return false; } + + DECLARE_INSTRUCTION(Constant); + + private: + DISALLOW_COPY_AND_ASSIGN(HConstant); +}; + +class HNullConstant : public HConstant { + public: + bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + + size_t ComputeHashCode() const OVERRIDE { return 0; } + + DECLARE_INSTRUCTION(NullConstant); + + private: + HNullConstant() : HConstant(Primitive::kPrimNot) {} + + friend class HGraph; + DISALLOW_COPY_AND_ASSIGN(HNullConstant); +}; + +// Constants of the type int. Those can be from Dex instructions, or +// synthesized (for example with the if-eqz instruction). +class HIntConstant : public HConstant { + public: + int32_t GetValue() const { return value_; } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + DCHECK(other->IsIntConstant()); + return other->AsIntConstant()->value_ == value_; + } + + size_t ComputeHashCode() const OVERRIDE { return GetValue(); } + + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + + DECLARE_INSTRUCTION(IntConstant); + + private: + explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {} + explicit HIntConstant(bool value) : HConstant(Primitive::kPrimInt), value_(value ? 1 : 0) {} + + const int32_t value_; + + friend class HGraph; + ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); + ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); + DISALLOW_COPY_AND_ASSIGN(HIntConstant); +}; + +class HLongConstant : public HConstant { + public: + int64_t GetValue() const { return value_; } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + DCHECK(other->IsLongConstant()); + return other->AsLongConstant()->value_ == value_; + } + + size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + + bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } + bool IsZero() const OVERRIDE { return GetValue() == 0; } + bool IsOne() const OVERRIDE { return GetValue() == 1; } + + DECLARE_INSTRUCTION(LongConstant); + + private: + explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {} + + const int64_t value_; + + friend class HGraph; + DISALLOW_COPY_AND_ASSIGN(HLongConstant); +}; // Conditional branch. A block ending with an HIf instruction must have // two successors. @@ -1974,29 +2278,24 @@ class HTryBoundary : public HTemplateInstruction<0> { // Returns whether `handler` is among its exception handlers (non-zero index // successors). - bool HasExceptionHandler(HBasicBlock* handler) const { - DCHECK(handler->IsCatchBlock()); - return GetBlock()->GetSuccessors().Contains(handler, /* start_from */ 1); - } - - // Returns whether successor at index `idx` is an exception handler. - bool IsExceptionalSuccessor(size_t idx) const { - DCHECK_LT(idx, GetBlock()->GetSuccessors().Size()); - bool is_handler = (idx != 0); - DCHECK(!is_handler || GetBlock()->GetSuccessors().Get(idx)->IsCatchBlock()); - return is_handler; + bool HasExceptionHandler(const HBasicBlock& handler) const { + DCHECK(handler.IsCatchBlock()); + return GetBlock()->GetSuccessors().Contains( + const_cast<HBasicBlock*>(&handler), /* start_from */ 1); } // If not present already, adds `handler` to its block's list of exception // handlers. void AddExceptionHandler(HBasicBlock* handler) { - if (!HasExceptionHandler(handler)) { + if (!HasExceptionHandler(*handler)) { GetBlock()->AddSuccessor(handler); } } bool IsEntry() const { return kind_ == BoundaryKind::kEntry; } + bool HasSameExceptionHandlersAs(const HTryBoundary& other) const; + DECLARE_INSTRUCTION(TryBoundary); private: @@ -2005,6 +2304,24 @@ class HTryBoundary : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HTryBoundary); }; +// Iterator over exception handlers of a given HTryBoundary, i.e. over +// exceptional successors of its basic block. +class HExceptionHandlerIterator : public ValueObject { + public: + explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary) + : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {} + + bool Done() const { return index_ == block_.GetSuccessors().Size(); } + HBasicBlock* Current() const { return block_.GetSuccessors().Get(index_); } + size_t CurrentSuccessorIndex() const { return index_; } + void Advance() { ++index_; } + + private: + const HBasicBlock& block_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator); +}; // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { @@ -2062,8 +2379,8 @@ class HUnaryOperation : public HExpression<1> { HConstant* TryStaticEvaluation() const; // Apply this operation to `x`. - virtual int32_t Evaluate(int32_t x) const = 0; - virtual int64_t Evaluate(int64_t x) const = 0; + virtual HConstant* Evaluate(HIntConstant* x) const = 0; + virtual HConstant* Evaluate(HLongConstant* x) const = 0; DECLARE_INSTRUCTION(UnaryOperation); @@ -2075,7 +2392,9 @@ class HBinaryOperation : public HExpression<2> { public: HBinaryOperation(Primitive::Type result_type, HInstruction* left, - HInstruction* right) : HExpression(result_type, SideEffects::None()) { + HInstruction* right, + SideEffects side_effects = SideEffects::None()) + : HExpression(result_type, side_effects) { SetRawInputAt(0, left); SetRawInputAt(1, right); } @@ -2130,8 +2449,18 @@ class HBinaryOperation : public HExpression<2> { HConstant* TryStaticEvaluation() const; // Apply this operation to `x` and `y`. - virtual int32_t Evaluate(int32_t x, int32_t y) const = 0; - virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; + virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0; + virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0; + virtual HConstant* Evaluate(HIntConstant* x ATTRIBUTE_UNUSED, + HLongConstant* y ATTRIBUTE_UNUSED) const { + VLOG(compiler) << DebugName() << " is not defined for the (int, long) case."; + return nullptr; + } + virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED, + HIntConstant* y ATTRIBUTE_UNUSED) const { + VLOG(compiler) << DebugName() << " is not defined for the (long, int) case."; + return nullptr; + } // Returns an input that can legally be used as the right input and is // constant, or null. @@ -2149,7 +2478,7 @@ class HBinaryOperation : public HExpression<2> { // The comparison bias applies for floating point operations and indicates how NaN // comparisons are treated: -enum ComparisonBias { +enum class ComparisonBias { kNoBias, // bias is not applicable (i.e. for long operation) kGtBias, // return 1 for NaN comparisons kLtBias, // return -1 for NaN comparisons @@ -2160,7 +2489,7 @@ class HCondition : public HBinaryOperation { HCondition(HInstruction* first, HInstruction* second) : HBinaryOperation(Primitive::kPrimBoolean, first, second), needs_materialization_(true), - bias_(kNoBias) {} + bias_(ComparisonBias::kNoBias) {} bool NeedsMaterialization() const { return needs_materialization_; } void ClearNeedsMaterialization() { needs_materialization_ = false; } @@ -2175,7 +2504,7 @@ class HCondition : public HBinaryOperation { virtual IfCondition GetOppositeCondition() const = 0; - bool IsGtBias() { return bias_ == kGtBias; } + bool IsGtBias() const { return bias_ == ComparisonBias::kGtBias; } void SetBias(ComparisonBias bias) { bias_ = bias; } @@ -2183,6 +2512,18 @@ class HCondition : public HBinaryOperation { return bias_ == other->AsCondition()->bias_; } + bool IsFPConditionTrueIfNaN() const { + DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())); + IfCondition if_cond = GetCondition(); + return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE); + } + + bool IsFPConditionFalseIfNaN() const { + DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())); + IfCondition if_cond = GetCondition(); + return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ); + } + private: // For register allocation purposes, returns whether this instruction needs to be // materialized (that is, not just be in the processor flags). @@ -2202,11 +2543,13 @@ class HEqual : public HCondition { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x == y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x == y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x == y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(Equal); @@ -2230,11 +2573,13 @@ class HNotEqual : public HCondition { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x != y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x != y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x != y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(NotEqual); @@ -2256,11 +2601,13 @@ class HLessThan : public HCondition { HLessThan(HInstruction* first, HInstruction* second) : HCondition(first, second) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x < y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x < y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x < y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(LessThan); @@ -2282,11 +2629,13 @@ class HLessThanOrEqual : public HCondition { HLessThanOrEqual(HInstruction* first, HInstruction* second) : HCondition(first, second) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x <= y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x <= y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x <= y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(LessThanOrEqual); @@ -2308,11 +2657,13 @@ class HGreaterThan : public HCondition { HGreaterThan(HInstruction* first, HInstruction* second) : HCondition(first, second) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x > y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x > y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x > y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(GreaterThan); @@ -2334,11 +2685,13 @@ class HGreaterThanOrEqual : public HCondition { HGreaterThanOrEqual(HInstruction* first, HInstruction* second) : HCondition(first, second) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x >= y ? 1 : 0; + template <typename T> bool Compute(T x, T y) const { return x >= y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x >= y ? 1 : 0; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(GreaterThanOrEqual); @@ -2365,23 +2718,21 @@ class HCompare : public HBinaryOperation { HInstruction* second, ComparisonBias bias, uint32_t dex_pc) - : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias), dex_pc_(dex_pc) { + : HBinaryOperation(Primitive::kPrimInt, first, second, SideEffectsForArchRuntimeCalls(type)), + bias_(bias), + dex_pc_(dex_pc) { DCHECK_EQ(type, first->GetType()); DCHECK_EQ(type, second->GetType()); } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return - x == y ? 0 : - x > y ? 1 : - -1; - } + template <typename T> + int32_t Compute(T x, T y) const { return x == y ? 0 : x > y ? 1 : -1; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return - x == y ? 0 : - x > y ? 1 : - -1; + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -2390,9 +2741,14 @@ class HCompare : public HBinaryOperation { ComparisonBias GetBias() const { return bias_; } - bool IsGtBias() { return bias_ == kGtBias; } + bool IsGtBias() { return bias_ == ComparisonBias::kGtBias; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + + static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type) { + // MIPS64 uses a runtime call for FP comparisons. + return Primitive::IsFloatingPointType(type) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } DECLARE_INSTRUCTION(Compare); @@ -2453,27 +2809,12 @@ class HStoreLocal : public HTemplateInstruction<2> { DISALLOW_COPY_AND_ASSIGN(HStoreLocal); }; -class HConstant : public HExpression<0> { - public: - explicit HConstant(Primitive::Type type) : HExpression(type, SideEffects::None()) {} - - bool CanBeMoved() const OVERRIDE { return true; } - - virtual bool IsMinusOne() const { return false; } - virtual bool IsZero() const { return false; } - virtual bool IsOne() const { return false; } - - DECLARE_INSTRUCTION(Constant); - - private: - DISALLOW_COPY_AND_ASSIGN(HConstant); -}; - class HFloatConstant : public HConstant { public: float GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + DCHECK(other->IsFloatConstant()); return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) == bit_cast<uint32_t, float>(value_); } @@ -2513,6 +2854,7 @@ class HDoubleConstant : public HConstant { double GetValue() const { return value_; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + DCHECK(other->IsDoubleConstant()); return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) == bit_cast<uint64_t, double>(value_); } @@ -2547,79 +2889,8 @@ class HDoubleConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; -class HNullConstant : public HConstant { - public: - bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - - size_t ComputeHashCode() const OVERRIDE { return 0; } - - DECLARE_INSTRUCTION(NullConstant); - - private: - HNullConstant() : HConstant(Primitive::kPrimNot) {} - - friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HNullConstant); -}; - -// Constants of the type int. Those can be from Dex instructions, or -// synthesized (for example with the if-eqz instruction). -class HIntConstant : public HConstant { - public: - int32_t GetValue() const { return value_; } - - bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - return other->AsIntConstant()->value_ == value_; - } - - size_t ComputeHashCode() const OVERRIDE { return GetValue(); } - - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsZero() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } - - DECLARE_INSTRUCTION(IntConstant); - - private: - explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {} - - const int32_t value_; - - friend class HGraph; - ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); - ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); - DISALLOW_COPY_AND_ASSIGN(HIntConstant); -}; - -class HLongConstant : public HConstant { - public: - int64_t GetValue() const { return value_; } - - bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - return other->AsLongConstant()->value_ == value_; - } - - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } - - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsZero() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } - - DECLARE_INSTRUCTION(LongConstant); - - private: - explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {} - - const int64_t value_; - - friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HLongConstant); -}; - enum class Intrinsics { -#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name, +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) k ## Name, #include "intrinsics_list.h" kNone, INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -2628,13 +2899,18 @@ enum class Intrinsics { }; std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); +enum IntrinsicNeedsEnvironment { + kNoEnvironment, // Intrinsic does not require an environment. + kNeedsEnvironment // Intrinsic requires an environment. +}; + class HInvoke : public HInstruction { public: size_t InputCount() const OVERRIDE { return inputs_.Size(); } // Runtime needs to walk the stack, so Dex -> Dex calls need to // know their environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const OVERRIDE { return needs_environment_ == kNeedsEnvironment; } void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); @@ -2659,8 +2935,9 @@ class HInvoke : public HInstruction { return intrinsic_; } - void SetIntrinsic(Intrinsics intrinsic) { + void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironment needs_environment) { intrinsic_ = intrinsic; + needs_environment_ = needs_environment; } bool IsFromInlinedInvoke() const { @@ -2679,14 +2956,16 @@ class HInvoke : public HInstruction { uint32_t dex_pc, uint32_t dex_method_index, InvokeType original_invoke_type) - : HInstruction(SideEffects::All()), + : HInstruction( + SideEffects::AllExceptGCDependency()), // Assume write/read on all fields/arrays. number_of_arguments_(number_of_arguments), inputs_(arena, number_of_arguments), return_type_(return_type), dex_pc_(dex_pc), dex_method_index_(dex_method_index), original_invoke_type_(original_invoke_type), - intrinsic_(Intrinsics::kNone) { + intrinsic_(Intrinsics::kNone), + needs_environment_(kNeedsEnvironment) { uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs; inputs_.SetSize(number_of_inputs); } @@ -2703,6 +2982,7 @@ class HInvoke : public HInstruction { const uint32_t dex_method_index_; const InvokeType original_invoke_type_; Intrinsics intrinsic_; + IntrinsicNeedsEnvironment needs_environment_; private: DISALLOW_COPY_AND_ASSIGN(HInvoke); @@ -2730,9 +3010,11 @@ class HInvokeStaticOrDirect : public HInvoke { ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - // There is one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit. - clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 2u : 1u, + // There is one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit, and one other + // if the method is a string factory. + 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) + + (string_init_offset ? 1u : 0u), return_type, dex_pc, dex_method_index, @@ -2749,6 +3031,10 @@ class HInvokeStaticOrDirect : public HInvoke { return false; } + bool CanBeNull() const OVERRIDE { + return return_type_ == Primitive::kPrimNot && !IsStringInit(); + } + InvokeType GetInvokeType() const { return invoke_type_; } bool IsRecursive() const { return is_recursive_; } bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } @@ -2777,6 +3063,23 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(IsStaticWithImplicitClinitCheck()); } + bool IsStringFactoryFor(HFakeString* str) const { + if (!IsStringInit()) return false; + // +1 for the current method. + if (InputCount() == (number_of_arguments_ + 1)) return false; + return InputAt(InputCount() - 1)->AsFakeString() == str; + } + + void RemoveFakeStringArgumentAsLastInput() { + DCHECK(IsStringInit()); + size_t last_input_index = InputCount() - 1; + HInstruction* last_input = InputAt(last_input_index); + DCHECK(last_input != nullptr); + DCHECK(last_input->IsFakeString()) << last_input->DebugName(); + RemoveAsUserOfInput(last_input_index); + inputs_.DeleteAt(last_input_index); + } + // Is this a call to a static method whose declaring class has an // explicit intialization check in the graph? bool IsStaticWithExplicitClinitCheck() const { @@ -2877,7 +3180,7 @@ class HNewInstance : public HExpression<1> { uint16_t type_index, const DexFile& dex_file, QuickEntrypointEnum entrypoint) - : HExpression(Primitive::kPrimNot, SideEffects::None()), + : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC()), dex_pc_(dex_pc), type_index_(type_index), dex_file_(dex_file), @@ -2914,11 +3217,17 @@ class HNewInstance : public HExpression<1> { class HNeg : public HUnaryOperation { public: - explicit HNeg(Primitive::Type result_type, HInstruction* input) + HNeg(Primitive::Type result_type, HInstruction* input) : HUnaryOperation(result_type, input) {} - int32_t Evaluate(int32_t x) const OVERRIDE { return -x; } - int64_t Evaluate(int64_t x) const OVERRIDE { return -x; } + template <typename T> T Compute(T x) const { return -x; } + + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue())); + } + HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue())); + } DECLARE_INSTRUCTION(Neg); @@ -2934,7 +3243,7 @@ class HNewArray : public HExpression<2> { uint16_t type_index, const DexFile& dex_file, QuickEntrypointEnum entrypoint) - : HExpression(Primitive::kPrimNot, SideEffects::None()), + : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC()), dex_pc_(dex_pc), type_index_(type_index), dex_file_(dex_file), @@ -2975,11 +3284,13 @@ class HAdd : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x + y; + template <typename T> T Compute(T x, T y) const { return x + y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x + y; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(Add); @@ -2993,11 +3304,13 @@ class HSub : public HBinaryOperation { HSub(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - return x - y; + template <typename T> T Compute(T x, T y) const { return x - y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - return x - y; + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); } DECLARE_INSTRUCTION(Sub); @@ -3013,8 +3326,14 @@ class HMul : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x * y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x * y; } + template <typename T> T Compute(T x, T y) const { return x * y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } DECLARE_INSTRUCTION(Mul); @@ -3025,23 +3344,32 @@ class HMul : public HBinaryOperation { class HDiv : public HBinaryOperation { public: HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {} + : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls()), + dex_pc_(dex_pc) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - // Our graph structure ensures we never have 0 for `y` during constant folding. + template <typename T> + T Compute(T x, T y) const { + // Our graph structure ensures we never have 0 for `y` during + // constant folding. DCHECK_NE(y, 0); // Special case -1 to avoid getting a SIGFPE on x86(_64). return (y == -1) ? -x : x / y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - DCHECK_NE(y, 0); - // Special case -1 to avoid getting a SIGFPE on x86(_64). - return (y == -1) ? -x : x / y; + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + static SideEffects SideEffectsForArchRuntimeCalls() { + // The generated code can use a runtime call. + return SideEffects::CanTriggerGC(); + } + DECLARE_INSTRUCTION(Div); private: @@ -3053,22 +3381,31 @@ class HDiv : public HBinaryOperation { class HRem : public HBinaryOperation { public: HRem(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {} + : HBinaryOperation(result_type, left, right, SideEffectsForArchRuntimeCalls()), + dex_pc_(dex_pc) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { + template <typename T> + T Compute(T x, T y) const { + // Our graph structure ensures we never have 0 for `y` during + // constant folding. DCHECK_NE(y, 0); // Special case -1 to avoid getting a SIGFPE on x86(_64). return (y == -1) ? 0 : x % y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - DCHECK_NE(y, 0); - // Special case -1 to avoid getting a SIGFPE on x86(_64). - return (y == -1) ? 0 : x % y; + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + DECLARE_INSTRUCTION(Rem); private: @@ -3084,6 +3421,8 @@ class HDivZeroCheck : public HExpression<1> { SetRawInputAt(0, value); } + Primitive::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); } + bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { @@ -3109,8 +3448,27 @@ class HShl : public HBinaryOperation { HShl(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x << (y & kMaxIntShiftValue); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x << (y & kMaxLongShiftValue); } + template <typename T, typename U, typename V> + T Compute(T x, U y, V max_shift_value) const { + static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value, + "V is not the unsigned integer type corresponding to T"); + return x << (y & max_shift_value); + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue)); + } + // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this + // case is handled as `x << static_cast<int>(y)`. + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); + } DECLARE_INSTRUCTION(Shl); @@ -3123,8 +3481,27 @@ class HShr : public HBinaryOperation { HShr(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x >> (y & kMaxIntShiftValue); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x >> (y & kMaxLongShiftValue); } + template <typename T, typename U, typename V> + T Compute(T x, U y, V max_shift_value) const { + static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value, + "V is not the unsigned integer type corresponding to T"); + return x >> (y & max_shift_value); + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue)); + } + // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this + // case is handled as `x >> static_cast<int>(y)`. + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); + } DECLARE_INSTRUCTION(Shr); @@ -3137,16 +3514,27 @@ class HUShr : public HBinaryOperation { HUShr(Primitive::Type result_type, HInstruction* left, HInstruction* right) : HBinaryOperation(result_type, left, right) {} - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { - uint32_t ux = static_cast<uint32_t>(x); - uint32_t uy = static_cast<uint32_t>(y) & kMaxIntShiftValue; - return static_cast<int32_t>(ux >> uy); + template <typename T, typename U, typename V> + T Compute(T x, U y, V max_shift_value) const { + static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value, + "V is not the unsigned integer type corresponding to T"); + V ux = static_cast<V>(x); + return static_cast<T>(ux >> (y & max_shift_value)); } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { - uint64_t ux = static_cast<uint64_t>(x); - uint64_t uy = static_cast<uint64_t>(y) & kMaxLongShiftValue; - return static_cast<int64_t>(ux >> uy); + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue)); + } + // There is no `Evaluate(HIntConstant* x, HLongConstant* y)`, as this + // case is handled as `x >>> static_cast<int>(y)`. + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue)); } DECLARE_INSTRUCTION(UShr); @@ -3162,8 +3550,21 @@ class HAnd : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x & y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x & y; } + template <typename T, typename U> + auto Compute(T x, U y) const -> decltype(x & y) { return x & y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } DECLARE_INSTRUCTION(And); @@ -3178,8 +3579,21 @@ class HOr : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x | y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x | y; } + template <typename T, typename U> + auto Compute(T x, U y) const -> decltype(x | y) { return x | y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } DECLARE_INSTRUCTION(Or); @@ -3194,8 +3608,21 @@ class HXor : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x ^ y; } - int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x ^ y; } + template <typename T, typename U> + auto Compute(T x, U y) const -> decltype(x ^ y) { return x ^ y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HIntConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue(), y->GetValue())); + } DECLARE_INSTRUCTION(Xor); @@ -3231,7 +3658,7 @@ class HParameterValue : public HExpression<0> { class HNot : public HUnaryOperation { public: - explicit HNot(Primitive::Type result_type, HInstruction* input) + HNot(Primitive::Type result_type, HInstruction* input) : HUnaryOperation(result_type, input) {} bool CanBeMoved() const OVERRIDE { return true; } @@ -3240,8 +3667,14 @@ class HNot : public HUnaryOperation { return true; } - int32_t Evaluate(int32_t x) const OVERRIDE { return ~x; } - int64_t Evaluate(int64_t x) const OVERRIDE { return ~x; } + template <typename T> T Compute(T x) const { return ~x; } + + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue())); + } + HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue())); + } DECLARE_INSTRUCTION(Not); @@ -3260,13 +3693,16 @@ class HBooleanNot : public HUnaryOperation { return true; } - int32_t Evaluate(int32_t x) const OVERRIDE { + template <typename T> bool Compute(T x) const { DCHECK(IsUint<1>(x)); return !x; } - int64_t Evaluate(int64_t x ATTRIBUTE_UNUSED) const OVERRIDE { - LOG(FATAL) << DebugName() << " cannot be used with 64-bit values"; + HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue())); + } + HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + LOG(FATAL) << DebugName() << " is not defined for long values"; UNREACHABLE(); } @@ -3280,7 +3716,8 @@ class HTypeConversion : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc) - : HExpression(result_type, SideEffects::None()), dex_pc_(dex_pc) { + : HExpression(result_type, SideEffectsForArchRuntimeCalls(input->GetType(), result_type)), + dex_pc_(dex_pc) { SetRawInputAt(0, input); DCHECK_NE(input->GetType(), result_type); } @@ -3300,6 +3737,18 @@ class HTypeConversion : public HExpression<1> { // containing the result. If the input cannot be converted, return nullptr. HConstant* TryStaticEvaluation() const; + static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type input_type, + Primitive::Type result_type) { + // Some architectures may not require the 'GC' side effects, but at this point + // in the compilation process we do not know what architecture we will + // generate code for, so we must be conservative. + if ((Primitive::IsFloatingPointType(input_type) && Primitive::IsIntegralType(result_type)) + || (input_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(result_type))) { + return SideEffects::CanTriggerGC(); + } + return SideEffects::None(); + } + DECLARE_INSTRUCTION(TypeConversion); private: @@ -3335,6 +3784,8 @@ class HPhi : public HInstruction { } } + bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } + size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); @@ -3450,7 +3901,9 @@ class HInstanceFieldGet : public HExpression<1> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HExpression(field_type, SideEffects::DependsOnSomething()), + : HExpression( + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) { SetRawInputAt(0, value); } @@ -3492,7 +3945,8 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction( + SideEffects::FieldWriteOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file), value_can_be_null_(true) { SetRawInputAt(0, object); @@ -3523,7 +3977,7 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { class HArrayGet : public HExpression<2> { public: HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type) - : HExpression(type, SideEffects::DependsOnSomething()) { + : HExpression(type, SideEffects::ArrayReadOfType(type)) { SetRawInputAt(0, array); SetRawInputAt(1, index); } @@ -3561,7 +4015,9 @@ class HArraySet : public HTemplateInstruction<3> { HInstruction* value, Primitive::Type expected_component_type, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction( + SideEffects::ArrayWriteOfType(expected_component_type).Union( + SideEffectsForArchRuntimeCalls(value->GetType()))), dex_pc_(dex_pc), expected_component_type_(expected_component_type), needs_type_check_(value->GetType() == Primitive::kPrimNot), @@ -3614,6 +4070,10 @@ class HArraySet : public HTemplateInstruction<3> { : expected_component_type_; } + static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) { + return (value_type == Primitive::kPrimNot) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + DECLARE_INSTRUCTION(ArraySet); private: @@ -3708,7 +4168,7 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} + : HTemplateInstruction(SideEffects::CanTriggerGC()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; @@ -3740,13 +4200,13 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::None()), + : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls()), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), dex_pc_(dex_pc), generate_clinit_check_(false), - loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) { + loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { SetRawInputAt(0, current_method); } @@ -3797,14 +4257,14 @@ class HLoadClass : public HExpression<1> { loaded_class_rti_ = rti; } - bool IsResolved() { - return loaded_class_rti_.IsExact(); - } - const DexFile& GetDexFile() { return dex_file_; } bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; } + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + DECLARE_INSTRUCTION(LoadClass); private: @@ -3824,7 +4284,7 @@ class HLoadClass : public HExpression<1> { class HLoadString : public HExpression<1> { public: HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::None()), + : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls()), string_index_(string_index), dex_pc_(dex_pc) { SetRawInputAt(0, current_method); @@ -3845,6 +4305,10 @@ class HLoadString : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return false; } bool NeedsDexCache() const OVERRIDE { return true; } + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + DECLARE_INSTRUCTION(LoadString); private: @@ -3859,8 +4323,10 @@ class HLoadString : public HExpression<1> { */ class HClinitCheck : public HExpression<1> { public: - explicit HClinitCheck(HLoadClass* constant, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::ChangesSomething()), + HClinitCheck(HLoadClass* constant, uint32_t dex_pc) + : HExpression( + Primitive::kPrimNot, + SideEffects::AllChanges()), // Assume write/read on all fields/arrays. dex_pc_(dex_pc) { SetRawInputAt(0, constant); } @@ -3896,7 +4362,9 @@ class HStaticFieldGet : public HExpression<1> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HExpression(field_type, SideEffects::DependsOnSomething()), + : HExpression( + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file) { SetRawInputAt(0, cls); } @@ -3935,7 +4403,8 @@ class HStaticFieldSet : public HTemplateInstruction<2> { bool is_volatile, uint32_t field_idx, const DexFile& dex_file) - : HTemplateInstruction(SideEffects::ChangesSomething()), + : HTemplateInstruction( + SideEffects::FieldWriteOfType(field_type, is_volatile)), field_info_(field_offset, field_type, is_volatile, field_idx, dex_file), value_can_be_null_(true) { SetRawInputAt(0, cls); @@ -3971,10 +4440,22 @@ class HLoadException : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HLoadException); }; +// Implicit part of move-exception which clears thread-local exception storage. +// Must not be removed because the runtime expects the TLS to get cleared. +class HClearException : public HTemplateInstruction<0> { + public: + HClearException() : HTemplateInstruction(SideEffects::AllWrites()) {} + + DECLARE_INSTRUCTION(ClearException); + + private: + DISALLOW_COPY_AND_ASSIGN(HClearException); +}; + class HThrow : public HTemplateInstruction<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) { + : HTemplateInstruction(SideEffects::CanTriggerGC()), dex_pc_(dex_pc) { SetRawInputAt(0, exception); } @@ -4000,7 +4481,7 @@ class HInstanceOf : public HExpression<2> { HLoadClass* constant, bool class_is_final, uint32_t dex_pc) - : HExpression(Primitive::kPrimBoolean, SideEffects::None()), + : HExpression(Primitive::kPrimBoolean, SideEffectsForArchRuntimeCalls(class_is_final)), class_is_final_(class_is_final), must_do_null_check_(true), dex_pc_(dex_pc) { @@ -4026,6 +4507,10 @@ class HInstanceOf : public HExpression<2> { bool MustDoNullCheck() const { return must_do_null_check_; } void ClearMustDoNullCheck() { must_do_null_check_ = false; } + static SideEffects SideEffectsForArchRuntimeCalls(bool class_is_final) { + return class_is_final ? SideEffects::None() : SideEffects::CanTriggerGC(); + } + DECLARE_INSTRUCTION(InstanceOf); private: @@ -4038,27 +4523,43 @@ class HInstanceOf : public HExpression<2> { class HBoundType : public HExpression<1> { public: - HBoundType(HInstruction* input, ReferenceTypeInfo bound_type) + // Constructs an HBoundType with the given upper_bound. + // Ensures that the upper_bound is valid. + HBoundType(HInstruction* input, ReferenceTypeInfo upper_bound, bool upper_can_be_null) : HExpression(Primitive::kPrimNot, SideEffects::None()), - bound_type_(bound_type) { + upper_bound_(upper_bound), + upper_can_be_null_(upper_can_be_null), + can_be_null_(upper_can_be_null) { DCHECK_EQ(input->GetType(), Primitive::kPrimNot); SetRawInputAt(0, input); + SetReferenceTypeInfo(upper_bound_); } - const ReferenceTypeInfo& GetBoundType() const { return bound_type_; } + // GetUpper* should only be used in reference type propagation. + const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; } + bool GetUpperCanBeNull() const { return upper_can_be_null_; } - bool CanBeNull() const OVERRIDE { - // `null instanceof ClassX` always return false so we can't be null. - return false; + void SetCanBeNull(bool can_be_null) { + DCHECK(upper_can_be_null_ || !can_be_null); + can_be_null_ = can_be_null; } + bool CanBeNull() const OVERRIDE { return can_be_null_; } + DECLARE_INSTRUCTION(BoundType); private: // Encodes the most upper class that this instruction can have. In other words - // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()). - // It is used to bound the type in cases like `if (x instanceof ClassX) {}` - const ReferenceTypeInfo bound_type_; + // it is always the case that GetUpperBound().IsSupertypeOf(GetReferenceType()). + // It is used to bound the type in cases like: + // if (x instanceof ClassX) { + // // uper_bound_ will be ClassX + // } + const ReferenceTypeInfo upper_bound_; + // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this + // is false then can_be_null_ cannot be true). + const bool upper_can_be_null_; + bool can_be_null_; DISALLOW_COPY_AND_ASSIGN(HBoundType); }; @@ -4069,7 +4570,7 @@ class HCheckCast : public HTemplateInstruction<2> { HLoadClass* constant, bool class_is_final, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), + : HTemplateInstruction(SideEffects::CanTriggerGC()), class_is_final_(class_is_final), must_do_null_check_(true), dex_pc_(dex_pc) { @@ -4110,7 +4611,8 @@ class HCheckCast : public HTemplateInstruction<2> { class HMemoryBarrier : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind) - : HTemplateInstruction(SideEffects::None()), + : HTemplateInstruction( + SideEffects::AllWritesAndReads()), // Assume write/read on all fields/arrays. barrier_kind_(barrier_kind) {} MemBarrierKind GetBarrierKind() { return barrier_kind_; } @@ -4131,7 +4633,9 @@ class HMonitorOperation : public HTemplateInstruction<1> { }; HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::ChangesSomething()), kind_(kind), dex_pc_(dex_pc) { + : HTemplateInstruction( + SideEffects::AllExceptGCDependency()), // Assume write/read on all fields/arrays. + kind_(kind), dex_pc_(dex_pc) { SetRawInputAt(0, object); } @@ -4159,6 +4663,25 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; +/** + * A HInstruction used as a marker for the replacement of new + <init> + * of a String to a call to a StringFactory. Only baseline will see + * the node at code generation, where it will be be treated as null. + * When compiling non-baseline, `HFakeString` instructions are being removed + * in the instruction simplifier. + */ +class HFakeString : public HTemplateInstruction<0> { + public: + HFakeString() : HTemplateInstruction(SideEffects::None()) {} + + Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimNot; } + + DECLARE_INSTRUCTION(FakeString); + + private: + DISALLOW_COPY_AND_ASSIGN(HFakeString); +}; + class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: MoveOperands(Location source, diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index bc565468b2..f793a65bf3 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -40,7 +40,7 @@ class HOptimization : public ArenaObject<kArenaAllocMisc> { // Return the name of the pass. const char* GetPassName() const { return pass_name_; } - // Peform the analysis itself. + // Perform the analysis itself. virtual void Run() = 0; protected: diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index fe3bb1a2b4..f455571636 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -29,7 +29,7 @@ namespace art { // Run the tests only on host. -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ class OptimizingCFITest : public CFITest { public: @@ -125,6 +125,6 @@ TEST_ISA(kArm64) TEST_ISA(kX86) TEST_ISA(kX86_64) -#endif // HAVE_ANDROID_OS +#endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 1e515307b4..6a50b7d4a4 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -35,6 +35,7 @@ #include "dex/verified_method.h" #include "dex/verification_results.h" #include "driver/compiler_driver.h" +#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" @@ -85,7 +86,7 @@ class CodeVectorAllocator FINAL : public CodeAllocator { * Filter to apply to the visualizer. Methods whose name contain that filter will * be dumped. */ -static const char* kStringFilter = ""; +static constexpr const char kStringFilter[] = ""; class PassScope; @@ -104,12 +105,14 @@ class PassObserver : public ValueObject { visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { - if (strstr(method_name, kStringFilter) == nullptr) { - timing_logger_enabled_ = visualizer_enabled_ = false; - } - if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); - codegen->SetDisassemblyInformation(&disasm_info_); + if (timing_logger_enabled_ || visualizer_enabled_) { + if (!IsVerboseMethod(compiler_driver, method_name)) { + timing_logger_enabled_ = visualizer_enabled_ = false; + } + if (visualizer_enabled_) { + visualizer_.PrintHeader(method_name_); + codegen->SetDisassemblyInformation(&disasm_info_); + } } } @@ -132,7 +135,7 @@ class PassObserver : public ValueObject { void StartPass(const char* pass_name) { // Dump graph first, then start timer. if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ false); + visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_); } if (timing_logger_enabled_) { timing_logger_.StartTiming(pass_name); @@ -145,7 +148,7 @@ class PassObserver : public ValueObject { timing_logger_.EndTiming(); } if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ true); + visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_); } // Validate the HGraph if running in debug mode. @@ -168,6 +171,23 @@ class PassObserver : public ValueObject { } } + static bool IsVerboseMethod(CompilerDriver* compiler_driver, const char* method_name) { + // Test an exact match to --verbose-methods. If verbose-methods is set, this overrides an + // empty kStringFilter matching all methods. + if (compiler_driver->GetCompilerOptions().HasVerboseMethods()) { + return compiler_driver->GetCompilerOptions().IsVerboseMethod(method_name); + } + + // Test the kStringFilter sub-string. constexpr helper variable to silence unreachable-code + // warning when the string is empty. + constexpr bool kStringFilterEmpty = arraysize(kStringFilter) <= 1; + if (kStringFilterEmpty || strstr(method_name, kStringFilter) != nullptr) { + return true; + } + + return false; + } + HGraph* const graph_; const char* method_name_; @@ -236,7 +256,7 @@ class OptimizingCompiler FINAL : public Compiler { } uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); } @@ -349,6 +369,36 @@ static void RunOptimizations(HOptimization* optimizations[], } } +static void MaybeRunInliner(HGraph* graph, + CompilerDriver* driver, + OptimizingCompilerStats* stats, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + StackHandleScopeCollection* handles) { + const CompilerOptions& compiler_options = driver->GetCompilerOptions(); + bool should_inline = (compiler_options.GetInlineDepthLimit() > 0) + && (compiler_options.GetInlineMaxCodeUnits() > 0); + if (!should_inline) { + return; + } + + ArenaAllocator* arena = graph->GetArena(); + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles, + "reference_type_propagation_after_inlining"); + + HOptimization* optimizations[] = { + inliner, + // Run another type propagation phase: inlining will open up more opportunities + // to remove checkcast/instanceof and null checks. + type_propagation, + }; + + RunOptimizations(optimizations, arraysize(optimizations), pass_observer); +} + static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, @@ -363,10 +413,6 @@ static void RunOptimizations(HGraph* graph, HConstantFolding* fold1 = new (arena) HConstantFolding(graph); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); - - HInliner* inliner = new (arena) HInliner( - graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); - HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); @@ -378,28 +424,29 @@ static void RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier_after_types"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_bce"); - ReferenceTypePropagation* type_propagation2 = - new (arena) ReferenceTypePropagation(graph, handles); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); - HOptimization* optimizations[] = { + HOptimization* optimizations1[] = { intrinsics, fold1, simplify1, type_propagation, dce1, - simplify2, - inliner, - // Run another type propagation phase: inlining will open up more opprotunities - // to remove checkast/instanceof and null checks. - type_propagation2, + simplify2 + }; + + RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); + + MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles); + + HOptimization* optimizations2[] = { // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. boolean_simplify, - fold2, + fold2, // TODO: if we don't inline we can also skip fold2. side_effects, gvn, licm, @@ -412,7 +459,7 @@ static void RunOptimizations(HGraph* graph, simplify4, }; - RunOptimizations(optimizations, arraysize(optimizations), pass_observer); + RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); } // The stack map we generate must be 4-byte aligned on ARM. Since existing @@ -555,8 +602,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } // Implementation of the space filter: do not compile a code item whose size in - // code units is bigger than 256. - static constexpr size_t kSpaceFilterOptimizingThreshold = 256; + // code units is bigger than 128. + static constexpr size_t kSpaceFilterOptimizingThreshold = 128; const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace) && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { @@ -565,7 +612,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } DexCompilationUnit dex_compilation_unit( - nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, + nullptr, class_loader, Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, compiler_driver->GetVerifiedMethod(&dex_file, method_idx)); @@ -602,12 +649,29 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer_output_.get(), compiler_driver); + const uint8_t* interpreter_metadata = nullptr; + { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<4> hs(soa.Self()); + ClassLinker* class_linker = dex_compilation_unit.GetClassLinker(); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(dex_file))); + Handle<mirror::ClassLoader> loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(class_loader))); + ArtMethod* art_method = compiler_driver->ResolveMethod( + soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); + // We may not get a method, for example if its class is erroneous. + // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile. + if (art_method != nullptr) { + interpreter_metadata = art_method->GetQuickenedInfo(); + } + } HGraphBuilder builder(graph, &dex_compilation_unit, &dex_compilation_unit, &dex_file, compiler_driver, - compilation_stats_.get()); + compilation_stats_.get(), + interpreter_metadata); VLOG(compiler) << "Building " << method_name; @@ -627,8 +691,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // `run_optimizations_` is set explicitly (either through a compiler filter // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back // to Quick. - bool can_use_baseline = !run_optimizations_; - if (run_optimizations_ && can_optimize && can_allocate_registers) { + bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit(); + if (run_optimizations_ && can_allocate_registers) { VLOG(compiler) << "Optimizing " << method_name; { @@ -637,16 +701,21 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // We could not transform the graph to SSA, bailout. LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + pass_observer.SetGraphInBadState(); return nullptr; } } - return CompileOptimized(graph, - codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); - } else if (shouldOptimize && can_allocate_registers) { + if (can_optimize) { + return CompileOptimized(graph, + codegen.get(), + compiler_driver, + dex_compilation_unit, + &pass_observer); + } + } + + if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else if (can_use_baseline) { diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 54ea6f19d4..f9d812f6a6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -38,6 +38,20 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) // Build up a worklist of moves. BuildInitialMoveList(parallel_move); + // Move stack/stack slot to take advantage of a free register on constrained machines. + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& move = *moves_.Get(i); + // Ignore constants and moves already eliminated. + if (move.IsEliminated() || move.GetSource().IsConstant()) { + continue; + } + + if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) && + (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) { + PerformMove(i); + } + } + for (size_t i = 0; i < moves_.Size(); ++i) { const MoveOperands& move = *moves_.Get(i); // Skip constants to perform them last. They don't block other moves diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 68316c2618..1349df9b16 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -25,19 +25,35 @@ namespace art { class RTPVisitor : public HGraphDelegateVisitor { public: - RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + RTPVisitor(HGraph* graph, + StackHandleScopeCollection* handles, + GrowableArray<HInstruction*>* worklist, + ReferenceTypeInfo::TypeHandle object_class_handle, + ReferenceTypeInfo::TypeHandle class_class_handle, + ReferenceTypeInfo::TypeHandle string_class_handle) : HGraphDelegateVisitor(graph), - handles_(handles) {} + handles_(handles), + object_class_handle_(object_class_handle), + class_class_handle_(class_class_handle), + string_class_handle_(string_class_handle), + worklist_(worklist) {} + void VisitNullConstant(HNullConstant* null_constant) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; + void VisitLoadString(HLoadString* instr) OVERRIDE; void VisitNewArray(HNewArray* instr) OVERRIDE; + void VisitParameterValue(HParameterValue* instr) OVERRIDE; void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; void VisitInvoke(HInvoke* instr) OVERRIDE; void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void VisitCheckCast(HCheckCast* instr) OVERRIDE; + void VisitNullCheck(HNullCheck* instr) OVERRIDE; + void VisitFakeString(HFakeString* instr) OVERRIDE; void UpdateReferenceTypeInfo(HInstruction* instr, uint16_t type_idx, const DexFile& dex_file, @@ -45,8 +61,33 @@ class RTPVisitor : public HGraphDelegateVisitor { private: StackHandleScopeCollection* handles_; + ReferenceTypeInfo::TypeHandle object_class_handle_; + ReferenceTypeInfo::TypeHandle class_class_handle_; + ReferenceTypeInfo::TypeHandle string_class_handle_; + GrowableArray<HInstruction*>* worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; }; +ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, + StackHandleScopeCollection* handles, + const char* name) + : HOptimization(graph, name), + handles_(handles), + worklist_(graph->GetArena(), kDefaultWorklistSize) { + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + object_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)); + string_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangString)); + class_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangClass)); + + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(ReferenceTypeInfo::IsValidHandle(object_class_handle_)); + DCHECK(ReferenceTypeInfo::IsValidHandle(class_class_handle_)); + DCHECK(ReferenceTypeInfo::IsValidHandle(string_class_handle_)); + } +} + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -55,29 +96,125 @@ void ReferenceTypePropagation::Run() { VisitBasicBlock(it.Current()); } ProcessWorklist(); + + if (kIsDebugBuild) { + // TODO: move this to the graph checker. + ScopedObjectAccess soa(Thread::Current()); + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { + HInstruction* instr = iti.Current(); + if (instr->GetType() == Primitive::kPrimNot) { + DCHECK(instr->GetReferenceTypeInfo().IsValid()) + << "Invalid RTI for instruction: " << instr->DebugName(); + if (instr->IsBoundType()) { + DCHECK(instr->AsBoundType()->GetUpperBound().IsValid()); + } else if (instr->IsLoadClass()) { + DCHECK(instr->AsLoadClass()->GetReferenceTypeInfo().IsExact()); + DCHECK(instr->AsLoadClass()->GetLoadedClassRTI().IsValid()); + } else if (instr->IsNullCheck()) { + DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo())) + << "NullCheck " << instr->GetReferenceTypeInfo() + << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); + } + } + } + } + } } void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - // TODO: handle other instructions that give type info - // (array accesses) + RTPVisitor visitor(graph_, + handles_, + &worklist_, + object_class_handle_, + class_class_handle_, + string_class_handle_); + // Handle Phis first as there might be instructions in the same block who depend on them. + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + VisitPhi(it.Current()->AsPhi()); + } - RTPVisitor visitor(graph_, handles_); - // Initialize exact types first for faster convergence. + // Handle instructions. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); instr->Accept(&visitor); } - // Handle Phis. - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - VisitPhi(it.Current()->AsPhi()); - } - // Add extra nodes to bound types. BoundTypeForIfNotNull(block); BoundTypeForIfInstanceOf(block); } +// Create a bound type for the given object narrowing the type as much as possible. +// The BoundType upper values for the super type and can_be_null will be taken from +// load_class.GetLoadedClassRTI() and upper_can_be_null. +static HBoundType* CreateBoundType(ArenaAllocator* arena, + HInstruction* obj, + HLoadClass* load_class, + bool upper_can_be_null) + SHARED_REQUIRES(Locks::mutator_lock_) { + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null); + // Narrow the type as much as possible. + if (class_rti.GetTypeHandle()->IsFinal()) { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true)); + } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) { + bound_type->SetReferenceTypeInfo(obj_rti); + } else { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + if (upper_can_be_null) { + bound_type->SetCanBeNull(obj->CanBeNull()); + } + return bound_type; +} + +// Check if we should create a bound type for the given object at the specified +// position. Because of inlining and the fact we run RTP more than once and we +// might have a HBoundType already. If we do, we should not create a new one. +// In this case we also assert that there are no other uses of the object (except +// the bound type) dominated by the specified dominator_instr or dominator_block. +static bool ShouldCreateBoundType(HInstruction* position, + HInstruction* obj, + ReferenceTypeInfo upper_bound, + HInstruction* dominator_instr, + HBasicBlock* dominator_block) + SHARED_REQUIRES(Locks::mutator_lock_) { + // If the position where we should insert the bound type is not already a + // a bound type then we need to create one. + if (position == nullptr || !position->IsBoundType()) { + return true; + } + + HBoundType* existing_bound_type = position->AsBoundType(); + if (existing_bound_type->GetUpperBound().IsSupertypeOf(upper_bound)) { + if (kIsDebugBuild) { + // Check that the existing HBoundType dominates all the uses. + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (dominator_instr != nullptr) { + DCHECK(!dominator_instr->StrictlyDominates(user) + || user == existing_bound_type + || existing_bound_type->StrictlyDominates(user)); + } else if (dominator_block != nullptr) { + DCHECK(!dominator_block->Dominates(user->GetBlock()) + || user == existing_bound_type + || existing_bound_type->StrictlyDominates(user)); + } + } + } + } else { + // TODO: if the current bound type is a refinement we could update the + // existing_bound_type with the a new upper limit. However, we also need to + // update its users and have access to the work list. + } + return false; +} + void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { HIf* ifInstruction = block->GetLastInstruction()->AsIf(); if (ifInstruction == nullptr) { @@ -116,8 +253,23 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { HInstruction* user = it.Current()->GetUser(); if (notNullBlock->Dominates(user->GetBlock())) { if (bound_type == nullptr) { - bound_type = new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false)); - notNullBlock->InsertInstructionBefore(bound_type, notNullBlock->GetFirstInstruction()); + ScopedObjectAccess soa(Thread::Current()); + HInstruction* insert_point = notNullBlock->GetFirstInstruction(); + ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( + object_class_handle_, /* is_exact */ true); + if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) { + bound_type = new (graph_->GetArena()) HBoundType( + obj, object_rti, /* bound_can_be_null */ false); + if (obj->GetReferenceTypeInfo().IsValid()) { + bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo()); + } + notNullBlock->InsertInstructionBefore(bound_type, insert_point); + } else { + // We already have a bound type on the position we would need to insert + // the new one. The existing bound type should dominate all the users + // (dchecked) so there's no need to continue. + break; + } } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } @@ -171,25 +323,23 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { HInstruction* user = it.Current()->GetUser(); if (instanceOfTrueBlock->Dominates(user->GetBlock())) { if (bound_type == nullptr) { + ScopedObjectAccess soa(Thread::Current()); HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - - ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); - - // Narrow the type as much as possible. - { - ScopedObjectAccess soa(Thread::Current()); - if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { - bound_type->SetReferenceTypeInfo(obj_rti); - } else { - bound_type->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); - } + HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction(); + if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) { + bound_type = CreateBoundType( + graph_->GetArena(), + obj, + load_class, + false /* InstanceOf ensures the object is not null. */); + instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point); + } else { + // We already have a bound type on the position we would need to insert + // the new one. The existing bound type should dominate all the users + // (dchecked) so there's no need to continue. + break; } - - instanceOfTrueBlock->InsertInstructionBefore( - bound_type, instanceOfTrueBlock->GetFirstInstruction()); } user->ReplaceInput(bound_type, it.Current()->GetIndex()); } @@ -199,11 +349,32 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact) { - if (klass != nullptr) { + if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) { + // Calls to String.<init> are replaced with a StringFactory. + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->AsInvoke()->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* declaring_class = method->GetDeclaringClass(); + DCHECK(declaring_class != nullptr); + DCHECK(declaring_class->IsStringClass()) + << "Expected String class: " << PrettyDescriptor(declaring_class); + DCHECK(method->IsConstructor()) + << "Expected String.<init>: " << PrettyMethod(method); + } + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true)); + } else if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); - MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); + ReferenceTypeInfo::TypeHandle handle = handles_->NewHandle(klass); is_exact = is_exact || klass->IsFinal(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); + } else { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); } } @@ -219,6 +390,13 @@ void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } +void RTPVisitor::VisitNullConstant(HNullConstant* instr) { + // TODO: The null constant could be bound contextually (e.g. based on return statements) + // to a more precise type. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); +} + void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } @@ -227,6 +405,13 @@ void RTPVisitor::VisitNewArray(HNewArray* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } +void RTPVisitor::VisitParameterValue(HParameterValue* instr) { + if (instr->GetType() == Primitive::kPrimNot) { + // TODO: parse the signature and add precise types for the parameters. + SetClassAsTypeInfo(instr, nullptr, /* is_exact */ false); + } +} + void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info) { // The field index is unknown only during tests. @@ -238,10 +423,10 @@ void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, ClassLinker* cl = Runtime::Current()->GetClassLinker(); mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile()); ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache); - if (field != nullptr) { - mirror::Class* klass = field->GetType<false>(); - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); - } + // TODO: There are certain cases where we can't resolve the field. + // b/21914925 is open to keep track of a repro case for this issue. + mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>(); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { @@ -258,12 +443,64 @@ void RTPVisitor::VisitLoadClass(HLoadClass* instr) { Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); // Get type from dex cache assuming it was populated by the verifier. mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); - if (resolved_class != nullptr) { - Handle<mirror::Class> handle = handles_->NewHandle(resolved_class); - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true)); + // TODO: investigating why we are still getting unresolved classes: b/22821472. + ReferenceTypeInfo::TypeHandle handle = (resolved_class != nullptr) + ? handles_->NewHandle(resolved_class) + : object_class_handle_; + instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true)); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_class_handle_, /* is_exact */ true)); +} + +void RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { + instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); +} + +void RTPVisitor::VisitLoadString(HLoadString* instr) { + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true)); +} + +void RTPVisitor::VisitNullCheck(HNullCheck* instr) { + ScopedObjectAccess soa(Thread::Current()); + ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + DCHECK(parent_rti.IsValid()); + instr->SetReferenceTypeInfo(parent_rti); +} + +void RTPVisitor::VisitFakeString(HFakeString* instr) { + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true)); +} + +void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* obj = check_cast->InputAt(0); + HBoundType* bound_type = nullptr; + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (check_cast->StrictlyDominates(user)) { + if (bound_type == nullptr) { + ScopedObjectAccess soa(Thread::Current()); + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) { + bound_type = CreateBoundType( + GetGraph()->GetArena(), + obj, + load_class, + true /* CheckCast succeeds for nulls. */); + check_cast->GetBlock()->InsertInstructionAfter(bound_type, check_cast); + } else { + // Update nullability of the existing bound type, which may not have known + // that its input was not null when it was being created. + bound_type = check_cast->GetNext()->AsBoundType(); + bound_type->SetCanBeNull(obj->CanBeNull()); + // We already have a bound type on the position we would need to insert + // the new one. The existing bound type should dominate all the users + // (dchecked) so there's no need to continue. + break; + } + } + user->ReplaceInput(bound_type, it.Current()->GetIndex()); + } } - Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass()); - instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true)); } void ReferenceTypePropagation::VisitPhi(HPhi* phi) { @@ -290,29 +527,54 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) { + if (!b.IsValid()) { + return a; + } + if (!a.IsValid()) { + return b; + } + bool is_exact = a.IsExact() && b.IsExact(); - bool is_top = a.IsTop() || b.IsTop(); Handle<mirror::Class> type_handle; - if (!is_top) { - if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { - type_handle = a.GetTypeHandle(); - } else if (a.IsSupertypeOf(b)) { - type_handle = a.GetTypeHandle(); - is_exact = false; - } else if (b.IsSupertypeOf(a)) { - type_handle = b.GetTypeHandle(); - is_exact = false; - } else { - // TODO: Find a common super class. - is_top = true; - is_exact = false; - } + if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { + type_handle = a.GetTypeHandle(); + } else if (a.IsSupertypeOf(b)) { + type_handle = a.GetTypeHandle(); + is_exact = false; + } else if (b.IsSupertypeOf(a)) { + type_handle = b.GetTypeHandle(); + is_exact = false; + } else { + // TODO: Find the first common super class. + type_handle = object_class_handle_; + is_exact = false; } - return is_top - ? ReferenceTypeInfo::CreateTop(is_exact) - : ReferenceTypeInfo::Create(type_handle, is_exact); + return ReferenceTypeInfo::Create(type_handle, is_exact); +} + +static void UpdateArrayGet(HArrayGet* instr, + StackHandleScopeCollection* handles, + ReferenceTypeInfo::TypeHandle object_class_handle) + SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK_EQ(Primitive::kPrimNot, instr->GetType()); + + ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + DCHECK(parent_rti.IsValid()); + + Handle<mirror::Class> handle = parent_rti.GetTypeHandle(); + if (handle->IsObjectArrayClass()) { + ReferenceTypeInfo::TypeHandle component_handle = handles->NewHandle(handle->GetComponentType()); + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(component_handle, /* is_exact */ false)); + } else { + // We don't know what the parent actually is, so we fallback to object. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false)); + } + + return; } bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { @@ -323,6 +585,15 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { UpdateBoundType(instr->AsBoundType()); } else if (instr->IsPhi()) { UpdatePhi(instr->AsPhi()); + } else if (instr->IsNullCheck()) { + ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + if (parent_rti.IsValid()) { + instr->SetReferenceTypeInfo(parent_rti); + } + } else if (instr->IsArrayGet()) { + // TODO: consider if it's worth "looking back" and bounding the input object + // to an array type. + UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_); } else { LOG(FATAL) << "Invalid instruction (should not get here)"; } @@ -340,45 +611,45 @@ void RTPVisitor::VisitInvoke(HInvoke* instr) { mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); ArtMethod* method = dex_cache->GetResolvedMethod( instr->GetDexMethodIndex(), cl->GetImagePointerSize()); - if (method != nullptr) { - mirror::Class* klass = method->GetReturnType(false); - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); - } + mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } void RTPVisitor::VisitArrayGet(HArrayGet* instr) { if (instr->GetType() != Primitive::kPrimNot) { return; } - - HInstruction* parent = instr->InputAt(0); ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); - if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { - SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + UpdateArrayGet(instr, handles_, object_class_handle_); + if (!instr->GetReferenceTypeInfo().IsValid()) { + worklist_->Add(instr); } } void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - // Be sure that we don't go over the bounded type. - ReferenceTypeInfo bound_rti = instr->GetBoundType(); - if (!bound_rti.IsSupertypeOf(new_rti)) { - new_rti = bound_rti; + if (!new_rti.IsValid()) { + return; // No new info yet. + } + + // Make sure that we don't go over the bounded type. + ReferenceTypeInfo upper_bound_rti = instr->GetUpperBound(); + if (!upper_bound_rti.IsSupertypeOf(new_rti)) { + new_rti = upper_bound_rti; } instr->SetReferenceTypeInfo(new_rti); } void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - if (new_rti.IsTop() && !new_rti.IsExact()) { - // Early return if we are Top and inexact. + if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) { + // Early return if we are Object and inexact. instr->SetReferenceTypeInfo(new_rti); return; } for (size_t i = 1; i < instr->InputCount(); i++) { new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo()); - if (new_rti.IsTop()) { + if (new_rti.IsValid() && new_rti.IsObjectClass()) { if (!new_rti.IsExact()) { break; } else { @@ -392,21 +663,31 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { - DCHECK(instr->IsPhi() || instr->IsBoundType()); + DCHECK(instr->IsPhi() + || instr->IsBoundType() + || instr->IsNullCheck() + || instr->IsArrayGet()); - if (!instr->IsPhi()) { + if (!instr->IsPhi() && !instr->IsBoundType()) { return false; } - HPhi* phi = instr->AsPhi(); - bool existing_can_be_null = phi->CanBeNull(); - bool new_can_be_null = false; - for (size_t i = 0; i < phi->InputCount(); i++) { - new_can_be_null |= phi->InputAt(i)->CanBeNull(); + bool existing_can_be_null = instr->CanBeNull(); + if (instr->IsPhi()) { + HPhi* phi = instr->AsPhi(); + bool new_can_be_null = false; + for (size_t i = 0; i < phi->InputCount(); i++) { + if (phi->InputAt(i)->CanBeNull()) { + new_can_be_null = true; + break; + } + } + phi->SetCanBeNull(new_can_be_null); + } else if (instr->IsBoundType()) { + HBoundType* bound_type = instr->AsBoundType(); + bound_type->SetCanBeNull(instr->InputAt(0)->CanBeNull() && bound_type->GetUpperCanBeNull()); } - phi->SetCanBeNull(new_can_be_null); - - return existing_can_be_null != new_can_be_null; + return existing_can_be_null != instr->CanBeNull(); } void ReferenceTypePropagation::ProcessWorklist() { @@ -419,14 +700,18 @@ void ReferenceTypePropagation::ProcessWorklist() { } void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType(); + DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) + << instruction->DebugName() << ":" << instruction->GetType(); worklist_.Add(instruction); } void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); - if (user->IsPhi() || user->IsBoundType()) { + if (user->IsPhi() + || user->IsBoundType() + || user->IsNullCheck() + || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) { AddToWorklist(user); } } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 17cfed45d5..14d4a82e9b 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -30,10 +30,9 @@ namespace art { */ class ReferenceTypePropagation : public HOptimization { public: - ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles) - : HOptimization(graph, kReferenceTypePropagationPassName), - handles_(handles), - worklist_(graph->GetArena(), kDefaultWorklistSize) {} + ReferenceTypePropagation(HGraph* graph, + StackHandleScopeCollection* handles, + const char* name = kReferenceTypePropagationPassName); void Run() OVERRIDE; @@ -42,8 +41,8 @@ class ReferenceTypePropagation : public HOptimization { private: void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void UpdateBoundType(HBoundType* bound_type) SHARED_REQUIRES(Locks::mutator_lock_); + void UpdatePhi(HPhi* phi) SHARED_REQUIRES(Locks::mutator_lock_); void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); void ProcessWorklist(); @@ -54,12 +53,16 @@ class ReferenceTypePropagation : public HOptimization { bool UpdateReferenceTypeInfo(HInstruction* instr); ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); StackHandleScopeCollection* handles_; GrowableArray<HInstruction*> worklist_; + ReferenceTypeInfo::TypeHandle object_class_handle_; + ReferenceTypeInfo::TypeHandle class_class_handle_; + ReferenceTypeInfo::TypeHandle string_class_handle_; + static constexpr size_t kDefaultWorklistSize = 8; DISALLOW_COPY_AND_ASSIGN(ReferenceTypePropagation); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 72ddabe559..9f32a9eaf8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -248,7 +248,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { bool core_register = (instruction->GetType() != Primitive::kPrimDouble) && (instruction->GetType() != Primitive::kPrimFloat); - if (locations->CanCall()) { + if (locations->NeedsSafepoint()) { if (codegen_->IsLeafMethod()) { // TODO: We do this here because we do not want the suspend check to artificially // create live registers. We should find another place, but this is currently the @@ -782,7 +782,10 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } else { DCHECK(!current->IsHighInterval()); int hint = current->FindFirstRegisterHint(free_until, liveness_); - if (hint != kNoRegister) { + if ((hint != kNoRegister) + // For simplicity, if the hint we are getting for a pair cannot be used, + // we are just going to allocate a new pair. + && !(current->IsLowInterval() && IsBlocked(GetHighForLowRegister(hint)))) { DCHECK(!IsBlocked(hint)); reg = hint; } else if (current->IsLowInterval()) { @@ -949,7 +952,16 @@ bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, // we spill `current` instead. bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { size_t first_register_use = current->FirstRegisterUse(); - if (first_register_use == kNoLifetime) { + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + // The low interval has allocated the register for the high interval. In + // case the low interval had to split both intervals, we may end up in a + // situation where the high interval does not have a register use anymore. + // We must still proceed in order to split currently active and inactive + // uses of the high interval's register, and put the high interval in the + // active set. + DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr)); + } else if (first_register_use == kNoLifetime) { AllocateSpillSlotFor(current); return false; } @@ -1016,7 +1028,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // When allocating the low part, we made sure the high register was available. DCHECK_LT(first_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use, first_register_use); + reg = FindAvailableRegisterPair(next_use, first_use); // We should spill if both registers are not available. should_spill = (first_use >= next_use[reg]) || (first_use >= next_use[GetHighForLowRegister(reg)]); @@ -1030,16 +1042,28 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (should_spill) { DCHECK(!current->IsHighInterval()); bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); - if (current->IsLowInterval() - && is_allocation_at_use_site - && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), - first_register_use, - next_use)) { + if (is_allocation_at_use_site) { + if (!current->IsLowInterval()) { + DumpInterval(std::cerr, current); + DumpAllIntervals(std::cerr); + // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); + CHECK(false) << "There is not enough registers available for " + << current->GetParent()->GetDefinedBy()->DebugName() << " " + << current->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); + } + // If we're allocating a register for `current` because the instruction at // that position requires it, but we think we should spill, then there are // non-pair intervals or unaligned pair intervals blocking the allocation. // We split the first interval found, and put ourselves first in the // `unhandled_` list. + bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use); + DCHECK(success); LiveInterval* existing = unhandled_->Peek(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); @@ -1049,17 +1073,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); - if (current == split) { - DumpInterval(std::cerr, current); - DumpAllIntervals(std::cerr); - // This situation has the potential to infinite loop, so we make it a non-debug CHECK. - HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); - CHECK(false) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName() << " " - << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1 << " " - << (at == nullptr ? "" : at->DebugName()); - } + DCHECK(current != split); AddSorted(unhandled_, split); } return false; @@ -1240,7 +1254,9 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { if (interval->IsHighInterval()) { - // The low interval will contain the spill slot. + // The low interval already took care of allocating the spill slot. + DCHECK(!interval->GetLowInterval()->HasRegister()); + DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot()); return; } diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index ea1ca5a731..1c3e255339 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -24,14 +24,15 @@ void SideEffectsAnalysis::Run() { block_effects_.SetSize(graph_->GetBlocks().Size()); loop_effects_.SetSize(graph_->GetBlocks().Size()); + // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); SideEffects effects = GetBlockEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + DCHECK(effects.DoesNothing()); if (block->IsLoopHeader()) { effects = GetLoopEffects(block); - DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + DCHECK(effects.DoesNothing()); } } } @@ -46,7 +47,9 @@ void SideEffectsAnalysis::Run() { inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); effects = effects.Union(instruction->GetSideEffects()); - if (effects.HasAllSideEffects()) { + // If all side effects are represented, scanning further will not add any + // more information to side-effects of this block. + if (effects.DoesAll()) { break; } } diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc new file mode 100644 index 0000000000..ec45d6b2ca --- /dev/null +++ b/compiler/optimizing/side_effects_test.cc @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not read this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" +#include "nodes.h" +#include "primitive.h" + +namespace art { + +/** + * Tests for the SideEffects class. + */ + +// +// Helper methods. +// + +void testWriteAndReadSanity(SideEffects write, SideEffects read) { + EXPECT_FALSE(write.DoesNothing()); + EXPECT_FALSE(read.DoesNothing()); + + EXPECT_TRUE(write.DoesAnyWrite()); + EXPECT_FALSE(write.DoesAnyRead()); + EXPECT_FALSE(read.DoesAnyWrite()); + EXPECT_TRUE(read.DoesAnyRead()); + + // All-dependences. + SideEffects all = SideEffects::All(); + EXPECT_TRUE(all.MayDependOn(write)); + EXPECT_FALSE(write.MayDependOn(all)); + EXPECT_FALSE(all.MayDependOn(read)); + EXPECT_TRUE(read.MayDependOn(all)); + + // None-dependences. + SideEffects none = SideEffects::None(); + EXPECT_FALSE(none.MayDependOn(write)); + EXPECT_FALSE(write.MayDependOn(none)); + EXPECT_FALSE(none.MayDependOn(read)); + EXPECT_FALSE(read.MayDependOn(none)); +} + +void testWriteAndReadDependence(SideEffects write, SideEffects read) { + testWriteAndReadSanity(write, read); + + // Dependence only in one direction. + EXPECT_FALSE(write.MayDependOn(read)); + EXPECT_TRUE(read.MayDependOn(write)); +} + +void testNoWriteAndReadDependence(SideEffects write, SideEffects read) { + testWriteAndReadSanity(write, read); + + // No dependence in any direction. + EXPECT_FALSE(write.MayDependOn(read)); + EXPECT_FALSE(read.MayDependOn(write)); +} + +// +// Actual tests. +// + +TEST(SideEffectsTest, All) { + SideEffects all = SideEffects::All(); + EXPECT_TRUE(all.DoesAnyWrite()); + EXPECT_TRUE(all.DoesAnyRead()); + EXPECT_FALSE(all.DoesNothing()); + EXPECT_TRUE(all.DoesAllReadWrite()); +} + +TEST(SideEffectsTest, None) { + SideEffects none = SideEffects::None(); + EXPECT_FALSE(none.DoesAnyWrite()); + EXPECT_FALSE(none.DoesAnyRead()); + EXPECT_TRUE(none.DoesNothing()); + EXPECT_FALSE(none.DoesAllReadWrite()); +} + +TEST(SideEffectsTest, DependencesAndNoDependences) { + // Apply test to each individual primitive type. + for (Primitive::Type type = Primitive::kPrimNot; + type < Primitive::kPrimVoid; + type = Primitive::Type(type + 1)) { + // Same primitive type and access type: proper write/read dep. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(type, false), + SideEffects::FieldReadOfType(type, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(type), + SideEffects::ArrayReadOfType(type)); + // Same primitive type but different access type: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(type, false), + SideEffects::ArrayReadOfType(type)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(type), + SideEffects::FieldReadOfType(type, false)); + } +} + +TEST(SideEffectsTest, NoDependences) { + // Different primitive type, same access type: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + // Everything different: no write/read dep. + testNoWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + testNoWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); +} + +TEST(SideEffectsTest, VolatileDependences) { + SideEffects volatile_write = + SideEffects::FieldWriteOfType(Primitive::kPrimInt, true); + SideEffects any_write = + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false); + SideEffects volatile_read = + SideEffects::FieldReadOfType(Primitive::kPrimByte, true); + SideEffects any_read = + SideEffects::FieldReadOfType(Primitive::kPrimByte, false); + + EXPECT_FALSE(volatile_write.MayDependOn(any_read)); + EXPECT_TRUE(any_read.MayDependOn(volatile_write)); + EXPECT_TRUE(volatile_write.MayDependOn(any_write)); + EXPECT_FALSE(any_write.MayDependOn(volatile_write)); + + EXPECT_FALSE(volatile_read.MayDependOn(any_read)); + EXPECT_TRUE(any_read.MayDependOn(volatile_read)); + EXPECT_TRUE(volatile_read.MayDependOn(any_write)); + EXPECT_FALSE(any_write.MayDependOn(volatile_read)); +} + +TEST(SideEffectsTest, SameWidthTypes) { + // Type I/F. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), + SideEffects::FieldReadOfType(Primitive::kPrimFloat, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimInt), + SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); + // Type L/D. + testWriteAndReadDependence( + SideEffects::FieldWriteOfType(Primitive::kPrimLong, false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + testWriteAndReadDependence( + SideEffects::ArrayWriteOfType(Primitive::kPrimLong), + SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); +} + +TEST(SideEffectsTest, AllWritesAndReads) { + SideEffects s = SideEffects::None(); + // Keep taking the union of different writes and reads. + for (Primitive::Type type = Primitive::kPrimNot; + type < Primitive::kPrimVoid; + type = Primitive::Type(type + 1)) { + s = s.Union(SideEffects::FieldWriteOfType(type, false)); + s = s.Union(SideEffects::ArrayWriteOfType(type)); + s = s.Union(SideEffects::FieldReadOfType(type, false)); + s = s.Union(SideEffects::ArrayReadOfType(type)); + } + EXPECT_TRUE(s.DoesAllReadWrite()); +} + +TEST(SideEffectsTest, GC) { + SideEffects can_trigger_gc = SideEffects::CanTriggerGC(); + SideEffects depends_on_gc = SideEffects::DependsOnGC(); + SideEffects all_changes = SideEffects::AllChanges(); + SideEffects all_dependencies = SideEffects::AllDependencies(); + + EXPECT_TRUE(depends_on_gc.MayDependOn(can_trigger_gc)); + EXPECT_TRUE(depends_on_gc.Union(can_trigger_gc).MayDependOn(can_trigger_gc)); + EXPECT_FALSE(can_trigger_gc.MayDependOn(depends_on_gc)); + + EXPECT_TRUE(depends_on_gc.MayDependOn(all_changes)); + EXPECT_TRUE(depends_on_gc.Union(can_trigger_gc).MayDependOn(all_changes)); + EXPECT_FALSE(can_trigger_gc.MayDependOn(all_changes)); + + EXPECT_TRUE(all_changes.Includes(can_trigger_gc)); + EXPECT_FALSE(all_changes.Includes(depends_on_gc)); + EXPECT_TRUE(all_dependencies.Includes(depends_on_gc)); + EXPECT_FALSE(all_dependencies.Includes(can_trigger_gc)); +} + +TEST(SideEffectsTest, BitStrings) { + EXPECT_STREQ( + "|||||||", + SideEffects::None().ToString().c_str()); + EXPECT_STREQ( + "|GC|DFJISCBZL|DFJISCBZL|GC|DFJISCBZL|DFJISCBZL|", + SideEffects::All().ToString().c_str()); + EXPECT_STREQ( + "|||||DFJISCBZL|DFJISCBZL|", + SideEffects::AllWrites().ToString().c_str()); + EXPECT_STREQ( + "||DFJISCBZL|DFJISCBZL||||", + SideEffects::AllReads().ToString().c_str()); + EXPECT_STREQ( + "||||||L|", + SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str()); + EXPECT_STREQ( + "|||||Z||", + SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str()); + EXPECT_STREQ( + "|||B||||", + SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str()); + EXPECT_STREQ( + "||DJ|||||", // note: DJ alias + SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); + SideEffects s = SideEffects::None(); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false)); + s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); + s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false)); + s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); + s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + EXPECT_STREQ( + "||DFJI|FI||S|DJC|", // note: DJ/FI alias. + s.ToString().c_str()); +} + +} // namespace art diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c37b1995fa..2c34e4dd03 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -350,7 +350,9 @@ HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { current_locals_ = GetLocalsFor(block); - if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // Catch phis were already created and inputs collected from throwing sites. + } else if (block->IsLoopHeader()) { // If the block is a loop header, we know we only have visited the pre header // because we are visiting in reverse post order. We create phis for all initialized // locals from the pre header. Their inputs will be populated at the end of @@ -551,19 +553,34 @@ void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { } void SsaBuilder::VisitInstruction(HInstruction* instruction) { - if (!instruction->NeedsEnvironment()) { - return; + if (instruction->NeedsEnvironment()) { + HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( + GetGraph()->GetArena(), + current_locals_->Size(), + GetGraph()->GetDexFile(), + GetGraph()->GetMethodIdx(), + instruction->GetDexPc(), + GetGraph()->GetInvokeType(), + instruction); + environment->CopyFrom(*current_locals_); + instruction->SetRawEnvironment(environment); + } + + // If in a try block, propagate values of locals into catch blocks. + if (instruction->GetBlock()->IsInTry() && instruction->CanThrow()) { + HTryBoundary* try_block = instruction->GetBlock()->GetTryEntry(); + for (HExceptionHandlerIterator it(*try_block); !it.Done(); it.Advance()) { + HBasicBlock* handler = it.Current(); + handler->AddExceptionalPredecessor(instruction); + GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(handler); + for (size_t i = 0, e = current_locals_->Size(); i < e; ++i) { + HInstruction* local_value = current_locals_->Get(i); + if (local_value != nullptr) { + handler_locals->Get(i)->AsPhi()->AddInput(local_value); + } + } + } } - HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( - GetGraph()->GetArena(), - current_locals_->Size(), - GetGraph()->GetDexFile(), - GetGraph()->GetMethodIdx(), - instruction->GetDexPc(), - GetGraph()->GetInvokeType(), - instruction); - environment->CopyFrom(*current_locals_); - instruction->SetRawEnvironment(environment); } void SsaBuilder::VisitTemporary(HTemporary* temp) { diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 1c83c4ba48..64600db648 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,9 +61,22 @@ class SsaBuilder : public HGraphVisitor { GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); if (locals == nullptr) { - locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( - GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals->SetSize(GetGraph()->GetNumberOfVRegs()); + const size_t vregs = GetGraph()->GetNumberOfVRegs(); + ArenaAllocator* arena = GetGraph()->GetArena(); + locals = new (arena) GrowableArray<HInstruction*>(arena, vregs); + locals->SetSize(vregs); + + if (block->IsCatchBlock()) { + // We record incoming inputs of catch phis at throwing instructions and + // must therefore eagerly create the phis. Unused phis will be removed + // in the dead phi analysis. + for (size_t i = 0; i < vregs; ++i) { + HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + block->AddPhi(phi); + locals->Put(i, phi); + } + } + locals_for_.Put(block->GetBlockId(), locals); } return locals; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 701dbb019b..40502c173b 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -225,7 +225,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // SsaLivenessAnalysis. for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); - bool should_be_live = ShouldBeLiveForEnvironment(instruction); + bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); if (should_be_live) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 220ee6a8d0..a7044de850 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -1201,8 +1201,14 @@ class SsaLivenessAnalysis : public ValueObject { // Update the live_out set of the block and returns whether it has changed. bool UpdateLiveOut(const HBasicBlock& block); - static bool ShouldBeLiveForEnvironment(HInstruction* instruction) { + // Returns whether `instruction` in an HEnvironment held by `env_holder` + // should be kept live by the HEnvironment. + static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, + HInstruction* instruction) { if (instruction == nullptr) return false; + // A value that's not live in compiled code may still be needed in interpreter, + // due to code motion, etc. + if (env_holder->IsDeoptimize()) return true; if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; return instruction->GetType() == Primitive::kPrimNot; } diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 2f2e2d1fab..917341a1e7 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -114,6 +114,12 @@ void SsaRedundantPhiElimination::Run() { continue; } + if (phi->InputCount() == 0) { + DCHECK(phi->IsCatchPhi()); + DCHECK(phi->IsDead()); + continue; + } + // Find if the inputs of the phi are the same instruction. HInstruction* candidate = phi->InputAt(0); // A loop phi cannot have itself as the first phi. Note that this @@ -137,6 +143,11 @@ void SsaRedundantPhiElimination::Run() { continue; } + // The candidate may not dominate a phi in a catch block. + if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) { + continue; + } + if (phi->IsInLoop()) { // Because we're updating the users of this phi, we may have new // phis candidate for elimination if this phi is in a loop. Add phis that diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 65610d54a6..1f1530fa1e 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -248,7 +248,7 @@ void StackMapStream::FillIn(MemoryRegion region) { DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index b4ac1b4d1a..33207d92d2 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -55,8 +55,7 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -154,8 +153,7 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(2u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(4u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -304,8 +302,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(1u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); // The Dex register location catalog contains: @@ -398,8 +395,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index // has a size of 1 bit. - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_location_catalog_entries); ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries)); @@ -501,8 +497,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_location_catalog_entries = - code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); ASSERT_EQ(0u, location_catalog.Size()); diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h index bdab2796d8..9fb22452ea 100644 --- a/compiler/trampolines/trampoline_compiler.h +++ b/compiler/trampolines/trampoline_compiler.h @@ -27,10 +27,10 @@ namespace art { // Create code that will invoke the function held in thread local storage. const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset<4> entry_point_offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset<8> entry_point_offset) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + SHARED_REQUIRES(Locks::mutator_lock_); } // namespace art diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 09d22703fe..0e3e08c2da 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -252,11 +252,11 @@ uint32_t Address::encodingThumbLdrdStrd() const { if (offset_ < 0) { int32_t off = -offset_; CHECK_LT(off, 1024); - CHECK_EQ((off & 3 /* 0b11 */), 0); // Must be multiple of 4. + CHECK_ALIGNED(off, 4); encoding = (am ^ (1 << kUShift)) | off >> 2; // Flip U to adjust sign. } else { CHECK_LT(offset_, 1024); - CHECK_EQ((offset_ & 3 /* 0b11 */), 0); // Must be multiple of 4. + CHECK_ALIGNED(offset_, 4); encoding = am | offset_ >> 2; } encoding |= static_cast<uint32_t>(rn_) << 16; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 5d85d11054..ef60fefe4d 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -888,7 +888,7 @@ class ArmAssembler : public Assembler { // Slowpath entered when Thread::Current()->_exception is non-null class ArmExceptionSlowPath FINAL : public SlowPath { public: - explicit ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) + ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) : scratch_(scratch), stack_adjust_(stack_adjust) { } void Emit(Assembler *sp_asm) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 88b2f2cc4d..b499dddb0c 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,6 +25,58 @@ namespace art { namespace arm { +void Thumb2Assembler::Fixup::PrepareDependents(Thumb2Assembler* assembler) { + // For each Fixup, it's easy to find the Fixups that it depends on as they are either + // the following or the preceding Fixups until we find the target. However, for fixup + // adjustment we need the reverse lookup, i.e. what Fixups depend on a given Fixup. + // This function creates a compact representation of this relationship, where we have + // all the dependents in a single array and Fixups reference their ranges by start + // index and count. (Instead of having a per-fixup vector.) + + // Count the number of dependents of each Fixup. + const FixupId end_id = assembler->fixups_.size(); + Fixup* fixups = assembler->fixups_.data(); + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + uint32_t target = fixups[fixup_id].target_; + if (target > fixups[fixup_id].location_) { + for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) { + fixups[id].dependents_count_ += 1u; + } + } else { + for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) { + fixups[id - 1u].dependents_count_ += 1u; + } + } + } + // Assign index ranges in fixup_dependents_ to individual fixups. Record the end of the + // range in dependents_start_, we shall later decrement it as we fill in fixup_dependents_. + uint32_t number_of_dependents = 0u; + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + number_of_dependents += fixups[fixup_id].dependents_count_; + fixups[fixup_id].dependents_start_ = number_of_dependents; + } + if (number_of_dependents == 0u) { + return; + } + // Create and fill in the fixup_dependents_. + assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]); + FixupId* dependents = assembler->fixup_dependents_.get(); + for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) { + uint32_t target = fixups[fixup_id].target_; + if (target > fixups[fixup_id].location_) { + for (FixupId id = fixup_id + 1u; id != end_id && fixups[id].location_ < target; ++id) { + fixups[id].dependents_start_ -= 1u; + dependents[fixups[id].dependents_start_] = fixup_id; + } + } else { + for (FixupId id = fixup_id; id != 0u && fixups[id - 1u].location_ >= target; --id) { + fixups[id - 1u].dependents_start_ -= 1u; + dependents[fixups[id - 1u].dependents_start_] = fixup_id; + } + } + } +} + void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { CHECK(!label->IsBound()); @@ -32,10 +84,6 @@ void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { FixupId fixup_id = label->Position(); // The id for linked Fixup. Fixup* fixup = GetFixup(fixup_id); // Get the Fixup at this id. fixup->Resolve(bound_pc); // Fixup can be resolved now. - // Add this fixup as a dependency of all later fixups. - for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) { - GetFixup(id)->AddDependent(fixup_id); - } uint32_t fixup_location = fixup->GetLocation(); uint16_t next = buffer_.Load<uint16_t>(fixup_location); // Get next in chain. buffer_.Store<int16_t>(fixup_location, 0); @@ -59,7 +107,7 @@ void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_s uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size); if (adjustment != 0u) { *current_code_size += adjustment; - for (FixupId dependent_id : fixup->Dependents()) { + for (FixupId dependent_id : fixup->Dependents(*this)) { Fixup* dependent = GetFixup(dependent_id); dependent->IncreaseAdjustment(adjustment); if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) { @@ -71,6 +119,7 @@ void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_s } uint32_t Thumb2Assembler::AdjustFixups() { + Fixup::PrepareDependents(this); uint32_t current_code_size = buffer_.Size(); std::deque<FixupId> fixups_to_recalculate; if (kIsDebugBuild) { @@ -84,14 +133,27 @@ uint32_t Thumb2Assembler::AdjustFixups() { AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); } while (!fixups_to_recalculate.empty()) { - // Pop the fixup. - FixupId fixup_id = fixups_to_recalculate.front(); - fixups_to_recalculate.pop_front(); - Fixup* fixup = GetFixup(fixup_id); - DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); - buffer_.Store<int16_t>(fixup->GetLocation(), 0); - // See if it needs adjustment. - AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + do { + // Pop the fixup. + FixupId fixup_id = fixups_to_recalculate.front(); + fixups_to_recalculate.pop_front(); + Fixup* fixup = GetFixup(fixup_id); + DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); + buffer_.Store<int16_t>(fixup->GetLocation(), 0); + // See if it needs adjustment. + AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + } while (!fixups_to_recalculate.empty()); + + if ((current_code_size & 2) != 0 && !literals_.empty()) { + // If we need to add padding before literals, this may just push some out of range, + // so recalculate all load literals. This makes up for the fact that we don't mark + // load literal as a dependency of all previous Fixups even though it actually is. + for (Fixup& fixup : fixups_) { + if (fixup.IsLoadLiteral()) { + AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); + } + } + } } if (kIsDebugBuild) { // Check that no fixup is marked as being in fixups_to_recalculate anymore. @@ -101,7 +163,7 @@ uint32_t Thumb2Assembler::AdjustFixups() { } // Adjust literal pool labels for padding. - DCHECK_EQ(current_code_size & 1u, 0u); + DCHECK_ALIGNED(current_code_size, 2); uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size(); if (literals_adjustment != 0u) { for (Literal& literal : literals_) { @@ -152,7 +214,7 @@ void Thumb2Assembler::EmitLiterals() { // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment. // We don't support byte and half-word literals. uint32_t code_size = buffer_.Size(); - DCHECK_EQ(code_size & 1u, 0u); + DCHECK_ALIGNED(code_size, 2); if ((code_size & 2u) != 0u) { Emit16(0); } @@ -168,7 +230,7 @@ void Thumb2Assembler::EmitLiterals() { } inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); int16_t encoding = B15 | B14; if (cond != AL) { DCHECK(IsInt<9>(offset)); @@ -181,7 +243,7 @@ inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { } inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); int32_t s = (offset >> 31) & 1; // Sign bit. int32_t encoding = B31 | B30 | B29 | B28 | B15 | (s << 26) | // Sign bit goes to bit 26. @@ -205,7 +267,7 @@ inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) { DCHECK(!IsHighRegister(rn)); - DCHECK_EQ(offset & 1, 0); + DCHECK_ALIGNED(offset, 2); DCHECK(IsUint<7>(offset)); DCHECK(cond == EQ || cond == NE); return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) | @@ -250,7 +312,7 @@ inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) { DCHECK(!IsHighRegister(rt)); - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); DCHECK(IsUint<10>(offset)); return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2); } @@ -261,7 +323,7 @@ inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) { } inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 | @@ -270,7 +332,7 @@ inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Regist } inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B26 | B24 | B23 /* U = 1 */ | B20 | B11 | B9 | @@ -281,7 +343,7 @@ inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32 } inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) { - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); CHECK(IsUint<10>(offset)); return B31 | B30 | B29 | B27 | B26 | B24 | B23 /* U = 1 */ | B20 | B11 | B9 | B8 | @@ -294,7 +356,7 @@ inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32 inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) { DCHECK(!IsHighRegister(rt)); DCHECK(!IsHighRegister(rn)); - DCHECK_EQ(offset & 3, 0); + DCHECK_ALIGNED(offset, 4); DCHECK(IsUint<7>(offset)); return B14 | B13 | B11 | (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) | @@ -1423,7 +1485,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 3U /* 0b11 */; opcode_shift = 12; CHECK_LT(immediate, (1u << 9)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1437,7 +1499,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 5U /* 0b101 */; opcode_shift = 11; CHECK_LT(immediate, (1u << 10)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rn from instruction. rn = R0; @@ -1474,7 +1536,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, thumb_opcode = 0x61 /* 0b1100001 */; opcode_shift = 7; CHECK_LT(immediate, (1u << 9)); - CHECK_EQ((immediate & 3u /* 0b11 */), 0u); + CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. rn = R0; @@ -1652,7 +1714,7 @@ inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const { inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) { // The code size must be a multiple of 2. - DCHECK_EQ(current_code_size & 1u, 0u); + DCHECK_ALIGNED(current_code_size, 2); // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool. return current_code_size & 2; } @@ -1697,7 +1759,7 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target // being aligned, current PC alignment can be inferred from diff. - DCHECK_EQ(diff & 1, 0); + DCHECK_ALIGNED(diff, 2); diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; @@ -2045,7 +2107,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, if (sp_relative) { // SP relative, 10 bit offset. CHECK_LT(offset, (1 << 10)); - CHECK_EQ((offset & 3 /* 0b11 */), 0); + CHECK_ALIGNED(offset, 4); encoding |= rd << 8 | offset >> 2; } else { // No SP relative. The offset is shifted right depending on @@ -2058,12 +2120,12 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } else if (half) { // 6 bit offset, shifted by 1. CHECK_LT(offset, (1 << 6)); - CHECK_EQ((offset & 1 /* 0b1 */), 0); + CHECK_ALIGNED(offset, 2); offset >>= 1; } else { // 7 bit offset, shifted by 2. CHECK_LT(offset, (1 << 7)); - CHECK_EQ((offset & 3 /* 0b11 */), 0); + CHECK_ALIGNED(offset, 4); offset >>= 2; } encoding |= rn << 3 | offset << 6; @@ -2220,17 +2282,7 @@ void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x if (label->IsBound()) { // The branch is to a bound label which means that it's a backwards branch. - // Record this branch as a dependency of all Fixups between the label and the branch. GetFixup(branch_id)->Resolve(label->Position()); - for (FixupId fixup_id = branch_id; fixup_id != 0u; ) { - --fixup_id; - Fixup* fixup = GetFixup(fixup_id); - DCHECK_GE(label->Position(), 0); - if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) { - break; - } - fixup->AddDependent(branch_id); - } Emit16(0); } else { // Branch target is an unbound label. Add it to a singly-linked list maintained within diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 5e6969b4c2..41eb5d36f2 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -24,6 +24,7 @@ #include "constants_arm.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm/assembler_arm.h" +#include "utils/array_ref.h" #include "offsets.h" namespace art { @@ -37,6 +38,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { it_cond_index_(kNoItCondition), next_condition_(AL), fixups_(), + fixup_dependents_(), literals_(), last_position_adjustment_(0u), last_old_position_(0u), @@ -487,6 +489,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { return type_; } + bool IsLoadLiteral() const { + return GetType() >= kLoadLiteralNarrow; + } + Size GetOriginalSize() const { return original_size_; } @@ -507,12 +513,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { return adjustment_; } - const std::vector<FixupId>& Dependents() const { - return dependents_; - } + // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_. + static void PrepareDependents(Thumb2Assembler* assembler); - void AddDependent(FixupId dependent_id) { - dependents_.push_back(dependent_id); + ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const { + return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_, + dependents_count_); } // Resolve a branch when the target is known. @@ -557,7 +563,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { location_(location), target_(kUnresolved), adjustment_(0u), - dependents_() { + dependents_count_(0u), + dependents_start_(0u) { } static size_t SizeInBytes(Size size); @@ -584,7 +591,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { uint32_t location_; // Offset into assembler buffer in bytes. uint32_t target_; // Offset into assembler buffer in bytes. uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. - std::vector<FixupId> dependents_; // Fixups that require adjustment when current size changes. + // Fixups that require adjustment when current size changes are stored in a single + // array in the assembler and we store only the start index and count here. + uint32_t dependents_count_; + uint32_t dependents_start_; }; // Emit a single 32 or 16 bit data processing instruction. @@ -760,6 +770,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); std::vector<Fixup> fixups_; + std::unique_ptr<FixupId[]> fixup_dependents_; // Use std::deque<> for literal labels to allow insertions at the end // without invalidating pointers and references to existing elements. diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 68b7931a0c..84f5cb16fb 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -950,4 +950,73 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { __ GetAdjustedPosition(label.Position())); } +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiBDueToAlignmentOnSecondPass) { + // First part: as TwoCbzBeyondMaxOffset but add one 16-bit instruction to the end, + // so that the size is not Aligned<4>(.). On the first pass, the assembler resizes + // the second CBZ because it's out of range, then it will resize the first CBZ + // which has been pushed out of range. Thus, after the first pass, the code size + // will appear Aligned<4>(.) but the final size will not be. + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + __ ldr(arm::R0, arm::Address(arm::R0)); + + std::string expected_part1 = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n" // Here the offset is Aligned<4>(.). + "ldr r0, [r0]\n"; // Make the first part + + // Second part: as LoadLiteralMax1KiB with the caveat that the offset of the load + // literal will not be Aligned<4>(.) but it will appear to be when we process the + // instruction during the first pass, so the literal will need a padding and it + // will push the literal out of range, so we shall end up with "ldr.w". + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + expected_part1 + + "1:\n" + "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, Clz) { + __ clz(arm::R0, arm::R1); + + const char* expected = "clz r0, r1\n"; + + DriverStr(expected, "clz"); +} + } // namespace art diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h index 1513296c2d..6b4daed909 100644 --- a/compiler/utils/arm/constants_arm.h +++ b/compiler/utils/arm/constants_arm.h @@ -32,8 +32,9 @@ namespace arm { // Defines constants and accessor classes to assemble, disassemble and // simulate ARM instructions. // -// Section references in the code refer to the "ARM Architecture Reference -// Manual" from July 2005 (available at http://www.arm.com/miscPDFs/14128.pdf) +// Section references in the code refer to the "ARM Architecture +// Reference Manual ARMv7-A and ARMv7-R edition", issue C.b (24 July +// 2012). // // Constants for specific fields are defined in their respective named enums. // General constants are in an anonymous enum in class Instr. @@ -97,26 +98,32 @@ enum DRegister { // private marker to avoid generate-operator-out.py from proce std::ostream& operator<<(std::ostream& os, const DRegister& rhs); -// Values for the condition field as defined in section A3.2. +// Values for the condition field as defined in Table A8-1 "Condition +// codes" (refer to Section A8.3 "Conditional execution"). enum Condition { // private marker to avoid generate-operator-out.py from processing. kNoCondition = -1, - EQ = 0, // equal - NE = 1, // not equal - CS = 2, // carry set/unsigned higher or same - CC = 3, // carry clear/unsigned lower - MI = 4, // minus/negative - PL = 5, // plus/positive or zero - VS = 6, // overflow - VC = 7, // no overflow - HI = 8, // unsigned higher - LS = 9, // unsigned lower or same - GE = 10, // signed greater than or equal - LT = 11, // signed less than - GT = 12, // signed greater than - LE = 13, // signed less than or equal - AL = 14, // always (unconditional) - kSpecialCondition = 15, // special condition (refer to section A3.2.1) + // Meaning (integer) | Meaning (floating-point) + // ---------------------------------------+----------------------------------------- + EQ = 0, // Equal | Equal + NE = 1, // Not equal | Not equal, or unordered + CS = 2, // Carry set | Greater than, equal, or unordered + CC = 3, // Carry clear | Less than + MI = 4, // Minus, negative | Less than + PL = 5, // Plus, positive or zero | Greater than, equal, or unordered + VS = 6, // Overflow | Unordered (i.e. at least one NaN operand) + VC = 7, // No overflow | Not unordered + HI = 8, // Unsigned higher | Greater than, or unordered + LS = 9, // Unsigned lower or same | Less than or equal + GE = 10, // Signed greater than or equal | Greater than or equal + LT = 11, // Signed less than | Less than, or unordered + GT = 12, // Signed greater than | Greater than + LE = 13, // Signed less than or equal | Less than, equal, or unordered + AL = 14, // Always (unconditional) | Always (unconditional) + kSpecialCondition = 15, // Special condition (refer to Section A8.3 "Conditional execution"). kMaxCondition = 16, + + HS = CS, // HS (unsigned higher or same) is a synonym for CS. + LO = CC // LO (unsigned lower) is a synonym for CC. }; std::ostream& operator<<(std::ostream& os, const Condition& rhs); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 05882a30b0..8e85fe96ab 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -254,7 +254,7 @@ class Arm64Assembler FINAL : public Assembler { class Arm64Exception { private: - explicit Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) + Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) : scratch_(scratch), stack_adjust_(stack_adjust) { } diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index ff5a77c97a..303e0d5ad4 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -62,14 +62,14 @@ class ArrayRef { } template <size_t size> - constexpr ArrayRef(T (&array)[size]) + explicit constexpr ArrayRef(T (&array)[size]) : array_(array), size_(size) { } template <typename U, size_t size> - constexpr ArrayRef(U (&array)[size], - typename std::enable_if<std::is_same<T, const U>::value, tag>::type - t ATTRIBUTE_UNUSED = tag()) + explicit constexpr ArrayRef(U (&array)[size], + typename std::enable_if<std::is_same<T, const U>::value, tag>::type + t ATTRIBUTE_UNUSED = tag()) : array_(array), size_(size) { } @@ -83,9 +83,9 @@ class ArrayRef { } template <typename U, typename Alloc> - ArrayRef(const std::vector<U, Alloc>& v, - typename std::enable_if<std::is_same<T, const U>::value, tag>::type - t ATTRIBUTE_UNUSED = tag()) + explicit ArrayRef(const std::vector<U, Alloc>& v, + typename std::enable_if<std::is_same<T, const U>::value, tag>::type + t ATTRIBUTE_UNUSED = tag()) : array_(v.data()), size_(v.size()) { } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 20f61f942b..cb01cea8ef 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -32,7 +32,7 @@ namespace arm { // Include results file (generated manually) #include "assembler_thumb_test_expected.cc.inc" -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ // This controls whether the results are printed to the // screen or compared against the expected output. // To generate new expected output, set this to true and @@ -72,7 +72,7 @@ void InitResults() { } std::string GetToolsDir() { -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ // This will only work on the host. There is no as, objcopy or objdump on the device. static std::string toolsdir; @@ -89,7 +89,7 @@ std::string GetToolsDir() { } void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) { -#ifndef HAVE_ANDROID_OS +#ifndef __ANDROID__ static std::string toolsdir = GetToolsDir(); ScratchFile file; diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h index 8cdb180740..2c4a689096 100644 --- a/compiler/utils/dedupe_set.h +++ b/compiler/utils/dedupe_set.h @@ -99,7 +99,7 @@ class DedupeSet { return hashed_key.store_ptr; } - explicit DedupeSet(const char* set_name, SwapAllocator<void>& alloc) + DedupeSet(const char* set_name, SwapAllocator<void>& alloc) : allocator_(alloc), hash_time_(0) { for (HashType i = 0; i < kShard; ++i) { std::ostringstream oss; diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h index 8f98ea11ba..2a109bd11e 100644 --- a/compiler/utils/dex_cache_arrays_layout.h +++ b/compiler/utils/dex_cache_arrays_layout.h @@ -37,7 +37,7 @@ class DexCacheArraysLayout { } // Construct a layout for a particular dex file. - explicit DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file); + DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file); bool Valid() const { return Size() != 0u; diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index bb62bca3b9..893daff719 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -95,7 +95,7 @@ class ManagedRegisterSpill : public ManagedRegister { explicit ManagedRegisterSpill(const ManagedRegister& other) : ManagedRegister(other), size_(-1), spill_offset_(-1) { } - explicit ManagedRegisterSpill(const ManagedRegister& other, int32_t size) + ManagedRegisterSpill(const ManagedRegister& other, int32_t size) : ManagedRegister(other), size_(size), spill_offset_(-1) { } int32_t getSpillOffset() { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 0d1b82ce7b..df95daddc1 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -283,7 +283,7 @@ class MipsAssembler FINAL : public Assembler { // Slowpath entered when Thread::Current()->_exception is non-null class MipsExceptionSlowPath FINAL : public SlowPath { public: - explicit MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust) + MipsExceptionSlowPath(MipsManagedRegister scratch, size_t stack_adjust) : scratch_(scratch), stack_adjust_(stack_adjust) {} virtual void Emit(Assembler *sp_asm) OVERRIDE; private: diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 47b146a28c..31130ea43d 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -354,7 +354,7 @@ class Mips64Assembler FINAL : public Assembler { // Slowpath entered when Thread::Current()->_exception is non-null class Mips64ExceptionSlowPath FINAL : public SlowPath { public: - explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) : scratch_(scratch), stack_adjust_(stack_adjust) {} virtual void Emit(Assembler *sp_asm) OVERRIDE; private: diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 325ee4fa01..42ed8810f8 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -143,7 +143,6 @@ SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { LOG(ERROR) << "Unable to mmap new swap file chunk."; LOG(ERROR) << "Current size: " << size_ << " requested: " << next_part << "/" << min_size; LOG(ERROR) << "Free list:"; - MutexLock lock(Thread::Current(), lock_); DumpFreeMap(free_by_size_); LOG(ERROR) << "In free list: " << CollectFree(free_by_start_, free_by_size_); LOG(FATAL) << "Aborting..."; diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index 691df4a945..f7c772d673 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -60,15 +60,15 @@ class SwapSpace { public: SwapSpace(int fd, size_t initial_size); ~SwapSpace(); - void* Alloc(size_t size) LOCKS_EXCLUDED(lock_); - void Free(void* ptr, size_t size) LOCKS_EXCLUDED(lock_); + void* Alloc(size_t size) REQUIRES(!lock_); + void Free(void* ptr, size_t size) REQUIRES(!lock_); size_t GetSize() { return size_; } private: - SpaceChunk NewFileChunk(size_t min_size); + SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); int fd_; size_t size_; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index fa85ada864..914bd56683 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -145,6 +145,13 @@ void X86Assembler::movl(const Address& dst, Label* lbl) { EmitLabel(lbl, dst.length_ + 5); } +void X86Assembler::movntl(const Address& dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC3); + EmitOperand(src, dst); +} + void X86Assembler::bswapl(Register dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -1194,11 +1201,26 @@ void X86Assembler::imull(Register dst, Register src) { } -void X86Assembler::imull(Register reg, const Immediate& imm) { +void X86Assembler::imull(Register dst, Register src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x69); - EmitOperand(reg, Operand(reg)); - EmitImmediate(imm); + // See whether imm can be represented as a sign-extended 8bit value. + int32_t v32 = static_cast<int32_t>(imm.value()); + if (IsInt<8>(v32)) { + // Sign-extension works. + EmitUint8(0x6B); + EmitOperand(dst, Operand(src)); + EmitUint8(static_cast<uint8_t>(v32 & 0xFF)); + } else { + // Not representable, use full immediate. + EmitUint8(0x69); + EmitOperand(dst, Operand(src)); + EmitImmediate(imm); + } +} + + +void X86Assembler::imull(Register reg, const Immediate& imm) { + imull(reg, reg, imm); } @@ -1515,6 +1537,29 @@ void X86Assembler::repne_scasw() { } +void X86Assembler::repe_cmpsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + +void X86Assembler::repe_cmpsl() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + +void X86Assembler::rep_movsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA5); +} + + X86Assembler* X86Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index d1b4e1dc5f..850e1dab32 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -205,7 +205,7 @@ class Address : public Operand { class X86Assembler FINAL : public Assembler { public: - explicit X86Assembler() {} + X86Assembler() {} virtual ~X86Assembler() {} /* @@ -231,6 +231,8 @@ class X86Assembler FINAL : public Assembler { void movl(const Address& dst, const Immediate& imm); void movl(const Address& dst, Label* lbl); + void movntl(const Address& dst, Register src); + void bswapl(Register dst); void movzxb(Register dst, ByteRegister src); @@ -409,6 +411,7 @@ class X86Assembler FINAL : public Assembler { void imull(Register dst, Register src); void imull(Register reg, const Immediate& imm); + void imull(Register dst, Register src, const Immediate& imm); void imull(Register reg, const Address& address); void imull(Register reg); @@ -465,6 +468,9 @@ class X86Assembler FINAL : public Assembler { void jmp(Label* label); void repne_scasw(); + void repe_cmpsw(); + void repe_cmpsl(); + void rep_movsw(); X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); @@ -644,7 +650,6 @@ class X86Assembler FINAL : public Assembler { void EmitComplex(int rm, const Operand& operand, const Immediate& immediate); void EmitLabel(Label* label, int instruction_size); void EmitLabelLink(Label* label); - void EmitNearLabelLink(Label* label); void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index aacc57bb0c..c257b789be 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -105,6 +105,16 @@ TEST_F(AssemblerX86Test, Movl) { DriverStr(expected, "movl"); } +TEST_F(AssemblerX86Test, Movntl) { + GetAssembler()->movntl(x86::Address(x86::EDI, x86::EBX, x86::TIMES_4, 12), x86::EAX); + GetAssembler()->movntl(x86::Address(x86::EDI, 0), x86::EAX); + const char* expected = + "movntil %EAX, 0xc(%EDI,%EBX,4)\n" + "movntil %EAX, (%EDI)\n"; + + DriverStr(expected, "movntl"); +} + TEST_F(AssemblerX86Test, psrlq) { GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); const char* expected = "psrlq $0x20, %xmm0\n"; @@ -196,4 +206,28 @@ TEST_F(AssemblerX86Test, Repnescasw) { DriverStr(expected, "Repnescasw"); } +TEST_F(AssemblerX86Test, Repecmpsw) { + GetAssembler()->repe_cmpsw(); + const char* expected = "repe cmpsw\n"; + DriverStr(expected, "Repecmpsw"); +} + +TEST_F(AssemblerX86Test, Repecmpsl) { + GetAssembler()->repe_cmpsl(); + const char* expected = "repe cmpsl\n"; + DriverStr(expected, "Repecmpsl"); +} + +TEST_F(AssemblerX86Test, RepneScasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "repne_scasw"); +} + +TEST_F(AssemblerX86Test, RepMovsw) { + GetAssembler()->rep_movsw(); + const char* expected = "rep movsw\n"; + DriverStr(expected, "rep_movsw"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index f35f51c494..38a9e72dae 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -194,6 +194,21 @@ void X86_64Assembler::movl(const Address& dst, const Immediate& imm) { EmitImmediate(imm); } +void X86_64Assembler::movntl(const Address& dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0xC3); + EmitOperand(src.LowBits(), dst); +} + +void X86_64Assembler::movntq(const Address& dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(src, dst); + EmitUint8(0x0F); + EmitUint8(0xC3); + EmitOperand(src.LowBits(), dst); +} void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) { cmov(c, dst, src, true); @@ -1672,28 +1687,33 @@ void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) { EmitOperand(dst.LowBits(), Operand(src)); } -void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) { +void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); // imull only supports 32b immediate. - EmitOptionalRex32(reg, reg); + EmitOptionalRex32(dst, src); // See whether imm can be represented as a sign-extended 8bit value. int32_t v32 = static_cast<int32_t>(imm.value()); if (IsInt<8>(v32)) { // Sign-extension works. EmitUint8(0x6B); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(src)); EmitUint8(static_cast<uint8_t>(v32 & 0xFF)); } else { // Not representable, use full immediate. EmitUint8(0x69); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(src)); EmitImmediate(imm); } } +void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) { + imull(reg, reg, imm); +} + + void X86_64Assembler::imull(CpuRegister reg, const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(reg, address); @@ -1986,6 +2006,14 @@ void X86_64Assembler::jmp(Label* label) { } +void X86_64Assembler::rep_movsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA5); +} + + X86_64Assembler* X86_64Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); @@ -2073,6 +2101,29 @@ void X86_64Assembler::repne_scasw() { } +void X86_64Assembler::repe_cmpsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + +void X86_64Assembler::repe_cmpsl() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0xA7); +} + + +void X86_64Assembler::repe_cmpsq() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitRex64(); + EmitUint8(0xA7); +} + + void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { // TODO: Need to have a code constants table. int64_t constant = bit_cast<int64_t, double>(value); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 61ffeab1e8..67807850c6 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -326,10 +326,13 @@ class X86_64Assembler FINAL : public Assembler { void movq(CpuRegister dst, CpuRegister src); void movl(CpuRegister dst, CpuRegister src); + void movntl(const Address& dst, CpuRegister src); + void movntq(const Address& dst, CpuRegister src); + void movq(CpuRegister dst, const Address& src); void movl(CpuRegister dst, const Address& src); void movq(const Address& dst, CpuRegister src); - void movq(const Address& dst, const Immediate& src); + void movq(const Address& dst, const Immediate& imm); void movl(const Address& dst, CpuRegister src); void movl(const Address& dst, const Immediate& imm); @@ -539,6 +542,7 @@ class X86_64Assembler FINAL : public Assembler { void imull(CpuRegister dst, CpuRegister src); void imull(CpuRegister reg, const Immediate& imm); + void imull(CpuRegister dst, CpuRegister src, const Immediate& imm); void imull(CpuRegister reg, const Address& address); void imulq(CpuRegister src); @@ -603,6 +607,10 @@ class X86_64Assembler FINAL : public Assembler { void bswapq(CpuRegister dst); void repne_scasw(); + void repe_cmpsw(); + void repe_cmpsl(); + void repe_cmpsq(); + void rep_movsw(); // // Macros for High-level operations. @@ -796,7 +804,6 @@ class X86_64Assembler FINAL : public Assembler { void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate); void EmitLabel(Label* label, int instruction_size); void EmitLabelLink(Label* label); - void EmitNearLabelLink(Label* label); void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm); void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 6da5c35731..7e879e9ce1 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -35,7 +35,7 @@ TEST(AssemblerX86_64, CreateBuffer) { ASSERT_EQ(static_cast<size_t>(5), buffer.Size()); } -#ifdef HAVE_ANDROID_OS +#ifdef __ANDROID__ static constexpr size_t kRandomIterations = 1000; // Devices might be puny, don't stress them... #else static constexpr size_t kRandomIterations = 100000; // Hosts are pretty powerful. @@ -674,6 +674,46 @@ TEST_F(AssemblerX86_64Test, MovqAddrImm) { DriverStr(expected, "movq"); } +TEST_F(AssemblerX86_64Test, Movntl) { + GetAssembler()->movntl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntl(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9)); + const char* expected = + "movntil %EAX, 0xc(%RDI,%RBX,4)\n" + "movntil %EAX, 0xc(%RDI,%R9,4)\n" + "movntil %EAX, 0xc(%RDI,%R9,4)\n" + "movntil %EAX, (%R13)\n" + "movntil %R9d, (%R13,%R9,1)\n"; + + DriverStr(expected, "movntl"); +} + +TEST_F(AssemblerX86_64Test, Movntq) { + GetAssembler()->movntq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntq(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX)); + GetAssembler()->movntq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9)); + const char* expected = + "movntiq %RAX, 0xc(%RDI,%RBX,4)\n" + "movntiq %RAX, 0xc(%RDI,%R9,4)\n" + "movntiq %RAX, 0xc(%RDI,%R9,4)\n" + "movntiq %RAX, (%R13)\n" + "movntiq %R9, (%R13,%R9,1)\n"; + + DriverStr(expected, "movntq"); +} + TEST_F(AssemblerX86_64Test, Cvtsi2ssAddr) { GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0), @@ -796,6 +836,18 @@ TEST_F(AssemblerX86_64Test, Xorq) { DriverStr(expected, "xorq"); } +TEST_F(AssemblerX86_64Test, RepneScasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "repne_scasw"); +} + +TEST_F(AssemblerX86_64Test, RepMovsw) { + GetAssembler()->rep_movsw(); + const char* expected = "rep movsw\n"; + DriverStr(expected, "rep_movsw"); +} + TEST_F(AssemblerX86_64Test, Movsxd) { DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd"); } @@ -1263,4 +1315,22 @@ TEST_F(AssemblerX86_64Test, Repnescasw) { DriverStr(expected, "Repnescasw"); } +TEST_F(AssemblerX86_64Test, Repecmpsw) { + GetAssembler()->repe_cmpsw(); + const char* expected = "repe cmpsw\n"; + DriverStr(expected, "Repecmpsw"); +} + +TEST_F(AssemblerX86_64Test, Repecmpsl) { + GetAssembler()->repe_cmpsl(); + const char* expected = "repe cmpsl\n"; + DriverStr(expected, "Repecmpsl"); +} + +TEST_F(AssemblerX86_64Test, Repecmpsq) { + GetAssembler()->repe_cmpsq(); + const char* expected = "repe cmpsq\n"; + DriverStr(expected, "Repecmpsq"); +} + } // namespace art |