diff options
51 files changed, 2684 insertions, 1054 deletions
diff --git a/Android.mk b/Android.mk index 7beb30f5d1..803ba502cb 100644 --- a/Android.mk +++ b/Android.mk @@ -123,7 +123,7 @@ ifneq ($(ART_TEST_NO_SYNC),true) ifeq ($(ART_TEST_ANDROID_ROOT),) test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) $(TEST_ART_ADB_ROOT_AND_REMOUNT) - adb sync + adb sync system && adb sync data else test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) $(TEST_ART_ADB_ROOT_AND_REMOUNT) diff --git a/compiler/Android.bp b/compiler/Android.bp index df896dc73c..307a42cbba 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -67,6 +67,7 @@ art_cc_defaults { "optimizing/intrinsics.cc", "optimizing/licm.cc", "optimizing/linear_order.cc", + "optimizing/load_store_analysis.cc", "optimizing/load_store_elimination.cc", "optimizing/locations.cc", "optimizing/loop_optimization.cc", @@ -374,6 +375,7 @@ art_cc_test { "jni/jni_cfi_test.cc", "optimizing/codegen_test.cc", + "optimizing/load_store_analysis_test.cc", "optimizing/optimizing_cfi_test.cc", "optimizing/scheduler_test.cc", ], diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 912c96468d..0ca23a5c50 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -119,7 +119,7 @@ class LinkerPatch { // choose to squeeze the Type into fewer than 8 bits, we'll have to declare // patch_type_ as an uintN_t and do explicit static_cast<>s. enum class Type : uint8_t { - kMethod, + kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. kCall, kCallRelative, // NOTE: Actual patching is instruction_set-dependent. kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. @@ -130,11 +130,13 @@ class LinkerPatch { kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; - static LinkerPatch MethodPatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t target_method_idx) { - LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file); + static LinkerPatch RelativeMethodPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_method_idx) { + LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file); patch.method_idx_ = target_method_idx; + patch.pc_insn_offset_ = pc_insn_offset; return patch; } @@ -226,6 +228,7 @@ class LinkerPatch { bool IsPcRelative() const { switch (GetType()) { + case Type::kMethodRelative: case Type::kCallRelative: case Type::kTypeRelative: case Type::kTypeBssEntry: @@ -240,7 +243,7 @@ class LinkerPatch { } MethodReference TargetMethod() const { - DCHECK(patch_type_ == Type::kMethod || + DCHECK(patch_type_ == Type::kMethodRelative || patch_type_ == Type::kCall || patch_type_ == Type::kCallRelative); return MethodReference(target_dex_file_, method_idx_); @@ -281,7 +284,8 @@ class LinkerPatch { } uint32_t PcInsnOffset() const { - DCHECK(patch_type_ == Type::kTypeRelative || + DCHECK(patch_type_ == Type::kMethodRelative || + patch_type_ == Type::kTypeRelative || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || patch_type_ == Type::kStringBssEntry || diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc index 81a2cde794..72b2282ade 100644 --- a/compiler/compiled_method_test.cc +++ b/compiler/compiled_method_test.cc @@ -50,10 +50,14 @@ TEST(CompiledMethod, LinkerPatchOperators) { const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1); const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2); LinkerPatch patches[] = { - LinkerPatch::MethodPatch(16u, dex_file1, 1000u), - LinkerPatch::MethodPatch(16u, dex_file1, 1001u), - LinkerPatch::MethodPatch(16u, dex_file2, 1000u), - LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(16u, dex_file1, 1000u), LinkerPatch::CodePatch(16u, dex_file1, 1001u), LinkerPatch::CodePatch(16u, dex_file2, 1000u), @@ -107,10 +111,14 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u), - LinkerPatch::MethodPatch(32u, dex_file1, 1000u), - LinkerPatch::MethodPatch(32u, dex_file1, 1001u), - LinkerPatch::MethodPatch(32u, dex_file2, 1000u), - LinkerPatch::MethodPatch(32u, dex_file2, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u), + LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u), LinkerPatch::CodePatch(32u, dex_file1, 1000u), LinkerPatch::CodePatch(32u, dex_file1, 1001u), LinkerPatch::CodePatch(32u, dex_file2, 1000u), @@ -164,7 +172,7 @@ TEST(CompiledMethod, LinkerPatchOperators) { LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u), - LinkerPatch::MethodPatch(16u, dex_file2, 1001u), // identical with patch at index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Same as patch at index 3. }; constexpr size_t last_index = arraysize(patches) - 1u; diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 6572d170e6..bbd28b2576 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -71,11 +71,11 @@ TEST(CompiledMethodStorage, Deduplicate) { }; const LinkerPatch raw_patches1[] = { LinkerPatch::CodePatch(0u, nullptr, 1u), - LinkerPatch::MethodPatch(4u, nullptr, 1u), + LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u), }; const LinkerPatch raw_patches2[] = { LinkerPatch::CodePatch(0u, nullptr, 1u), - LinkerPatch::MethodPatch(4u, nullptr, 2u), + LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u), }; ArrayRef<const LinkerPatch> patches[] = { ArrayRef<const LinkerPatch>(raw_patches1), diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index c033c2d043..6d2b243d07 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -54,7 +54,7 @@ constexpr uint32_t kAdrpThunkSize = 8u; inline bool IsAdrpPatch(const LinkerPatch& patch) { switch (patch.GetType()) { - case LinkerPatch::Type::kMethod: + case LinkerPatch::Type::kMethodRelative: case LinkerPatch::Type::kCall: case LinkerPatch::Type::kCallRelative: case LinkerPatch::Type::kBakerReadBarrierBranch: @@ -250,11 +250,13 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, if ((insn & 0xfffffc00) == 0x91000000) { // ADD immediate, 64-bit with imm12 == 0 (unset). if (!kEmitCompilerReadBarrier) { - DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || + DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || + patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType(); } else { // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry. - DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || + DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || + patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative || patch.GetType() == LinkerPatch::Type::kStringBssEntry || patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType(); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 5091c0b35d..bc411dfc6d 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1331,9 +1331,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { PatchCodeAddress(&patched_code_, literal_offset, target_offset); break; } - case LinkerPatch::Type::kMethod: { - ArtMethod* method = GetTargetMethod(patch); - PatchMethodAddress(&patched_code_, literal_offset, method); + case LinkerPatch::Type::kMethodRelative: { + uint32_t target_offset = GetTargetMethodOffset(GetTargetMethod(patch)); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); break; } case LinkerPatch::Type::kBakerReadBarrierBranch: { @@ -1457,6 +1460,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } } + uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(writer_->HasBootImage()); + method = writer_->image_writer_->GetImageMethodAddress(method); + size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_); + uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index); + // TODO: Clean up offset types. The target offset must be treated as signed. + return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin); + } + uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(writer_->HasBootImage()); object = writer_->image_writer_->GetImageAddress(object); @@ -1486,34 +1498,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { data[3] = (address >> 24) & 0xffu; } - void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (writer_->HasBootImage()) { - method = writer_->image_writer_->GetImageMethodAddress(method); - } else if (kIsDebugBuild) { - // NOTE: We're using linker patches for app->boot references when the image can - // be relocated and therefore we need to emit .oat_patches. We're not using this - // for app->app references, so check that the method is an image method. - std::vector<gc::space::ImageSpace*> image_spaces = - Runtime::Current()->GetHeap()->GetBootImageSpaces(); - bool contains_method = false; - for (gc::space::ImageSpace* image_space : image_spaces) { - size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin(); - contains_method |= - image_space->GetImageHeader().GetMethodsSection().Contains(method_offset); - } - CHECK(contains_method); - } - // Note: We only patch targeting ArtMethods in image which is in the low 4gb. - uint32_t address = PointerToLowMemUInt32(method); - DCHECK_LE(offset + 4, code->size()); - uint8_t* data = &(*code)[offset]; - data[0] = address & 0xffu; - data[1] = (address >> 8) & 0xffu; - data[2] = (address >> 16) & 0xffu; - data[3] = (address >> 24) & 0xffu; - } - void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) REQUIRES_SHARED(Locks::mutator_lock_) { uint32_t address = target_offset; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index cb6e14b2bd..a949c33149 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public testing::Test { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 713d370c87..c66bd77d6b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1656,6 +1656,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) { } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition, bool invert, CodeGeneratorARM* codegen) { @@ -1669,7 +1697,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond std::swap(cond, opposite); } - std::pair<Condition, Condition> ret; + std::pair<Condition, Condition> ret(EQ, NE); const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1677,7 +1705,38 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond const Register left_high = left.AsRegisterPairHigh<Register>(); const Register left_low = left.AsRegisterPairLow<Register>(); - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(), + &cond, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ orrs(IP, left_low, ShifterOperand(left_high)); + return ret; + case kCondLT: + case kCondGE: + __ cmp(left_high, ShifterOperand(0)); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondAE: + __ cmp(left_low, ShifterOperand(left_low)); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1837,10 +1896,14 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition, static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition( + Int64FromConstant(locations->InAt(1).GetConstant()), + &c, + &opposite); ShifterOperand so; if (c < kCondLT || c > kCondGE) { @@ -1848,9 +1911,11 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || - !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || + !IsUint<8>(Low32Bits(value)))) { return false; } } else if (c == kCondLE || c == kCondGT) { @@ -1877,6 +1942,329 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + const auto condition = GenerateTest(cond, false, codegen); + + __ mov(out, ShifterOperand(0), AL, kCcKeep); + + if (ArmAssembler::IsLowRegister(out)) { + __ it(condition.first); + __ mov(out, ShifterOperand(1), condition.first); + } else { + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ b(final_label, condition.second); + __ LoadImmediate(out, 1); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + int32_t value_high = -High32Bits(value); + int32_t value_low = -Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out == left_high) { + std::swap(left_low, left_high); + std::swap(value_low, value_high); + } + + __ AddConstant(out, left_low, value_low); + __ AddConstant(IP, left_high, value_high); + } else { + DCHECK(right.IsRegisterPair()); + __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>())); + __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>())); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ orrs(out, out, ShifterOperand(IP)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ orr(out, out, ShifterOperand(IP)); + codegen->GenerateConditionWithZero(condition, out, out, IP); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + Label* true_label, + Label* false_label, + CodeGeneratorARM* codegen) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ CmpConstant(left_high, val_high); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ CmpConstant(left_low, val_low); + } else { + Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); + + __ cmp(left_high, ShifterOperand(right_high)); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ cmp(left_low, ShifterOperand(right_low)); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ b(true_label, final_condition); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out)) { + // We only care if both input registers are 0 or not. + __ orrs(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ orr(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>()); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); + + // False case: result = 0. + __ Bind(&false_label); + __ mov(out, ShifterOperand(0)); + __ b(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ mov(out, ShifterOperand(1)); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + Register in = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out) && out == in) { + __ cmp(out, ShifterOperand(0)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + ShifterOperand operand; + + if (right.IsConstant()) { + operand = ShifterOperand(value); + } else if (out == right.AsRegister<Register>()) { + // Avoid 32-bit instructions if possible. + operand = ShifterOperand(in); + in = right.AsRegister<Register>(); + } else { + operand = ShifterOperand(right.AsRegister<Register>()); + } + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ subs(out, in, operand); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2009,9 +2397,10 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2479,89 +2868,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, - Label* true_label, - Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - Register left_high = left.AsRegisterPairHigh<Register>(); - Register left_low = left.AsRegisterPairLow<Register>(); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ CmpConstant(left_high, val_high); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ CmpConstant(left_low, val_low); - } else { - Register right_high = right.AsRegisterPairHigh<Register>(); - Register right_low = right.AsRegisterPairLow<Register>(); - - __ cmp(left_high, ShifterOperand(right_high)); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ cmp(left_low, ShifterOperand(right_low)); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ b(true_label, final_condition); -} - void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, Label* true_target_in, Label* false_target_in) { @@ -2596,7 +2902,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); if (false_target != &fallthrough_target) { __ b(false_target); @@ -2911,6 +3217,80 @@ void CodeGeneratorARM::GenerateNop() { __ nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) { + // temp = - in; only 0 sets the carry flag. + __ rsbs(temp, in, ShifterOperand(0)); + + if (out == in) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ adc(out, temp, ShifterOperand(in)); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + if (out == in) { + if (temp == kNoRegister || in == temp) { + temp = IP; + } + } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ subs(temp, in, ShifterOperand(1)); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ sbc(out, in, ShifterOperand(temp)); + break; + case kCondGE: + __ mvn(out, ShifterOperand(in)); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ mov(out, ShifterOperand(1)); + break; + case kCondB: + // Trivially false. + __ mov(out, ShifterOperand(0)); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -2947,48 +3327,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { return; } - const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + const Primitive::Type type = cond->GetLeft()->GetType(); - if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); - - __ it(condition.first); - __ mov(out, ShifterOperand(1), condition.first); - __ it(condition.second); - __ mov(out, ShifterOperand(0), condition.second); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - Label done_label; - Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + const LocationSummary* const locations = cond->GetLocations(); + Register left = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right_loc = locations->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ LoadImmediate(out, 0); - __ b(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ LoadImmediate(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + Register right = right_loc.AsRegister<Register>(); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out == right) { + std::swap(left, right); + } - __ mov(out, ShifterOperand(0), AL, kCcKeep); - __ b(final_label, condition.second); - __ LoadImmediate(out, 1); - } + __ eor(out, left, ShifterOperand(right)); - if (done_label.IsLinked()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ eor(out, out, ShifterOperand(1)); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -8588,6 +8968,18 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register temp_reg = temp.AsRegister<Register>(); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp_reg, temp_reg, ShifterOperand(PC)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; @@ -8680,9 +9072,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ blx(LR); } -CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( @@ -8695,6 +9089,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8759,22 +9158,26 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 47e6be59bd..fa1c14dcda 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -482,10 +481,11 @@ class CodeGeneratorARM : public CodeGenerator { Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); @@ -623,6 +623,14 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp = kNoRegister); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -662,12 +670,14 @@ class CodeGeneratorARM : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7ff100d870..096eb07074 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1449,9 +1449,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4510,6 +4511,17 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative method patch. + vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add ADD with its PC-relative method patch. + vixl::aarch64::Label* add_label = + NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); @@ -4633,12 +4645,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->GenerateInvokePolymorphicCall(invoke); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( + MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + adrp_label, + &pc_relative_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( @@ -4655,6 +4668,14 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return + NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset, @@ -4747,9 +4768,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { @@ -4758,15 +4780,18 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc info.pc_insn_label->GetLocation(), info.offset_or_index)); } - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 56444dc0dc..71e221da22 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -549,12 +549,11 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative string patch for an instruction and return the label + // Add a new PC-relative method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, + vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); // Add a new PC-relative type patch for an instruction and return the label @@ -573,6 +572,14 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative dex cache array patch for an instruction and return // the label to be bound before the instruction. The instruction will be // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` @@ -787,12 +794,14 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 015e6ddea3..34821f83cd 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1775,6 +1775,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( HCondition* condition, bool invert, @@ -1797,7 +1825,37 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( const vixl32::Register left_high = HighRegisterFrom(left); const vixl32::Register left_low = LowRegisterFrom(left); - int64_t value = Int64ConstantFrom(right); + int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite); + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ Orrs(temps.Acquire(), left_low, left_high); + return ret; + case kCondLT: + case kCondGE: + __ Cmp(left_high, 0); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondAE: + __ Cmp(left_low, left_low); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1842,8 +1900,6 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( FALLTHROUGH_INTENDED; case kCondGE: case kCondLT: { - UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); - __ Cmp(left_low, Low32Bits(value)); __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); @@ -1961,18 +2017,22 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = Int64ConstantFrom(locations->InAt(1)); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = + AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); if (c < kCondLT || c > kCondGE) { // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { return false; } // TODO(VIXL): The rest of the checks are there to keep the backend in sync with @@ -1991,6 +2051,353 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const vixl32::Register out = OutputRegister(cond); + const auto condition = GenerateTest(cond, false, codegen); + + __ Mov(LeaveFlags, out, 0); + + if (out.IsLow()) { + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(condition.first); + __ mov(condition.first, out, 1); + } else { + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ B(condition.second, final_label, /* far_target */ false); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + vixl32::Register temp; + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), + &condition, + &opposite); + Operand right_high = High32Bits(value); + Operand right_low = Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out.Is(left_high)) { + std::swap(left_low, left_high); + std::swap(right_low, right_high); + } + + __ Sub(out, left_low, right_low); + temp = temps.Acquire(); + __ Sub(temp, left_high, right_high); + } else { + DCHECK(right.IsRegisterPair()); + temp = temps.Acquire(); + __ Sub(temp, left_high, HighRegisterFrom(right)); + __ Sub(out, left_low, LowRegisterFrom(right)); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && out.IsLow()) { + __ Orrs(out, out, temp); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Orr(out, out, temp); + codegen->GenerateConditionWithZero(condition, out, out, temp); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label, + CodeGeneratorARMVIXL* codegen) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = Int64ConstantFrom(right); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ Cmp(left_high, val_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, val_low); + } else { + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); + + __ Cmp(left_high, right_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, right_low); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ B(final_condition, true_label); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow()) { + // We only care if both input registers are 0 or not. + __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left)); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left)); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left)); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + vixl32::Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); + + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + IfCondition condition = cond->GetCondition(); + vixl32::Register in = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow() && out.Is(in)) { + __ Cmp(out, 0); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + Operand operand(0); + + if (right.IsConstant()) { + operand = Operand::From(value); + } else if (out.Is(RegisterFrom(right))) { + // Avoid 32-bit instructions if possible. + operand = InputOperandAt(cond, 0); + in = RegisterFrom(right); + } else { + operand = InputOperandAt(cond, 1); + } + + if (condition == kCondNE && out.IsLow()) { + __ Subs(out, in, operand); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2093,9 +2500,10 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2547,89 +2955,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label); -} - void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target_in, vixl32::Label* false_target_in) { @@ -2664,7 +2989,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); if (false_target != &fallthrough) { __ B(false_target); @@ -2975,6 +3300,83 @@ void CodeGeneratorARMVIXL::GenerateNop() { __ Nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, + vixl32::Register out, + vixl32::Register in, + vixl32::Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (in.IsLow() && temp.IsLow() && !in.Is(temp)) { + // temp = - in; only 0 sets the carry flag. + __ Rsbs(temp, in, 0); + + if (out.Is(in)) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ Adc(out, temp, in); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: { + UseScratchRegisterScope temps(GetVIXLAssembler()); + + if (out.Is(in)) { + if (!temp.IsValid() || in.Is(temp)) { + temp = temps.Acquire(); + } + } else if (!temp.IsValid() || !temp.IsLow()) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ Subs(temp, in, 1); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ Sbc(out, in, temp); + break; + } + case kCondGE: + __ Mvn(out, in); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ Mov(out, 1); + break; + case kCondB: + // Trivially false. + __ Mov(out, 0); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -3011,52 +3413,47 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - const vixl32::Register out = OutputRegister(cond); - - if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(GetVIXLAssembler(), - 4 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); + const Primitive::Type type = cond->GetLeft()->GetType(); - __ it(condition.first); - __ mov(condition.first, out, 1); - __ it(condition.second); - __ mov(condition.second, out, 0); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - vixl32::Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + vixl32::Register left = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right_loc = cond->GetLocations()->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + vixl32::Register right = RegisterFrom(right_loc); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out.Is(right)) { + std::swap(left, right); + } - __ Mov(LeaveFlags, out, 0); - __ B(condition.second, final_label, /* far_target */ false); - __ Mov(out, 1); - } + __ Eor(out, left, right); - if (done_label.IsReferenced()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ Eor(out, out, 1); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -8734,6 +9131,13 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; @@ -8850,9 +9254,11 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location __ blx(lr); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( @@ -8865,6 +9271,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntry return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8934,22 +9345,26 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index daba9bf060..91f7524c8e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -401,9 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, vixl::aarch32::Label* true_target, vixl::aarch32::Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, - vixl::aarch32::Label* true_label, - vixl::aarch32::Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -566,10 +563,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); @@ -716,6 +714,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, vixl::aarch32::Register out); + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + vixl::aarch32::Register out, + vixl::aarch32::Register in, + vixl::aarch32::Register temp = vixl32::Register()); + private: vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); @@ -760,12 +766,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 95be3d7fd2..d8ac99a9a6 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1061,9 +1061,10 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1602,30 +1603,36 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( @@ -1638,6 +1645,11 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -6947,7 +6959,7 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6; + bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6; IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -7084,6 +7096,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: break; default: @@ -7103,7 +7116,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); bool is_r6 = GetInstructionSetFeatures().IsR6(); - Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6) + Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6) ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; @@ -7121,6 +7134,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + bool reordering = __ SetReorder(false); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info, temp_reg, base_reg); + __ Addiu(temp_reg, temp_reg, /* placeholder */ 0x5678); + __ SetReorder(reordering); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 449cb4c62b..ff1fde6489 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -582,10 +582,11 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsLabel pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); @@ -644,12 +645,15 @@ class CodeGeneratorMIPS : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5cdff5a7bc..096139191e 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -958,9 +958,10 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1440,30 +1441,36 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( @@ -1476,6 +1483,11 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPa return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -4923,6 +4935,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadLiteral(temp.AsRegister<GpuRegister>(), kLoadDoubleword, diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 1f34ced687..f49ad49fce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -545,10 +545,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Label pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, @@ -605,12 +606,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4a279d8de1..f3ec112548 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1032,9 +1032,10 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -2167,7 +2168,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2176,7 +2177,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok HandleInvoke(invoke); // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { + if (invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } } @@ -4543,6 +4544,14 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootMethodPatch(invoke); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; @@ -4631,13 +4640,14 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + boot_image_method_patches_.emplace_back(address, + *invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { @@ -4656,6 +4666,15 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + string_patches_.emplace_back(address, + load_string->GetDexFile(), + load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = @@ -4693,17 +4712,21 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f08d642f5e..21c527e8b0 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -413,9 +413,10 @@ class CodeGeneratorX86 : public CodeGenerator { // Generate a call to a virtual method. void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, const DexFile& dex_file, @@ -633,16 +634,17 @@ class CodeGeneratorX86 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<X86PcRelativePatchInfo> string_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ac0f37b717..bf1c42ae8e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -991,6 +991,12 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().IsBootImage()); + __ leal(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + RecordBootMethodPatch(invoke); + break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; @@ -1079,10 +1085,10 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t kX86_64PointerSize).SizeValue())); } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { @@ -1096,6 +1102,12 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); @@ -1128,17 +1140,21 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } @@ -1231,12 +1247,13 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, isa_features_(isa_features), constant_area_start_(0), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d8005cc410..3039e0519c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -408,9 +408,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, @@ -603,22 +604,23 @@ class CodeGeneratorX86_64 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<PatchInfo<Label>> string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - - // Fixups for jump tables need to be handled specially. - ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<PatchInfo<Label>> string_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; + // Fixups for jump tables need to be handled specially. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 8674e727bb..0ec6ee2fe2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1856,7 +1856,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); - InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); + InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2cedde900e..d14716601c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -30,9 +30,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, + CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), + compiler_driver_(compiler_driver), stats_(stats) {} void Run(); @@ -119,6 +121,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -130,7 +133,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { }; void InstructionSimplifier::Run() { - InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); + InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); visitor.Run(); } @@ -1896,7 +1899,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_); + HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); } } } diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index f7329a4a1f..5e2045580b 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,6 +24,7 @@ namespace art { class CodeGenerator; +class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -37,12 +38,14 @@ class CodeGenerator; */ class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph, - CodeGenerator* codegen, - OptimizingCompilerStats* stats = nullptr, - const char* name = kInstructionSimplifierPassName) + InstructionSimplifier(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen) {} + codegen_(codegen), + compiler_driver_(compiler_driver) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; @@ -50,6 +53,7 @@ class InstructionSimplifier : public HOptimization { private: CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 1df884e551..e8a62aafae 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -2598,11 +2598,7 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ eor(out, out, ShifterOperand(infinity)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2625,11 +2621,7 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ eor(out, out, ShifterOperand(infinity_high2)); // We don't care about the sign bit, so shift left. __ orr(out, IP, ShifterOperand(out, LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 2d9781ade8..ce3ba52b34 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2971,11 +2971,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ Eor(out, out, infinity); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -3001,11 +2997,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { __ Eor(out, out, infinity_high2); // We don't care about the sign bit, so shift left. __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc new file mode 100644 index 0000000000..f2ee345c8c --- /dev/null +++ b/compiler/optimizing/load_store_analysis.cc @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" + +namespace art { + +// A cap for the number of heap locations to prevent pathological time/space consumption. +// The number of heap locations for most of the methods stays below this threshold. +constexpr size_t kMaxNumberOfHeapLocations = 32; + +void LoadStoreAnalysis::Run() { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector_.VisitBasicBlock(block); + } + + if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { + // Bail out if there are too many heap locations to deal with. + heap_location_collector_.CleanUp(); + return; + } + if (!heap_location_collector_.HasHeapStores()) { + // Without heap stores, this pass would act mostly as GVN on heap accesses. + heap_location_collector_.CleanUp(); + return; + } + if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { + // Don't do load/store elimination if the method has volatile field accesses or + // monitor operations, for now. + // TODO: do it right. + heap_location_collector_.CleanUp(); + return; + } + + heap_location_collector_.BuildAliasingMatrix(); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h new file mode 100644 index 0000000000..4e940f30bf --- /dev/null +++ b/compiler/optimizing/load_store_analysis.h @@ -0,0 +1,518 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ + +#include "escape.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +// A ReferenceInfo contains additional info about a reference such as +// whether it's a singleton, returned, etc. +class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { + public: + ReferenceInfo(HInstruction* reference, size_t pos) + : reference_(reference), + position_(pos), + is_singleton_(true), + is_singleton_and_not_returned_(true), + is_singleton_and_not_deopt_visible_(true), + has_index_aliasing_(false) { + CalculateEscape(reference_, + nullptr, + &is_singleton_, + &is_singleton_and_not_returned_, + &is_singleton_and_not_deopt_visible_); + } + + HInstruction* GetReference() const { + return reference_; + } + + size_t GetPosition() const { + return position_; + } + + // Returns true if reference_ is the only name that can refer to its value during + // the lifetime of the method. So it's guaranteed to not have any alias in + // the method (including its callees). + bool IsSingleton() const { + return is_singleton_; + } + + // Returns true if reference_ is a singleton and not returned to the caller or + // used as an environment local of an HDeoptimize instruction. + // The allocation and stores into reference_ may be eliminated for such cases. + bool IsSingletonAndRemovable() const { + return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; + } + + // Returns true if reference_ is a singleton and returned to the caller or + // used as an environment local of an HDeoptimize instruction. + bool IsSingletonAndNonRemovable() const { + return is_singleton_ && + (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); + } + + bool HasIndexAliasing() { + return has_index_aliasing_; + } + + void SetHasIndexAliasing(bool has_index_aliasing) { + // Only allow setting to true. + DCHECK(has_index_aliasing); + has_index_aliasing_ = has_index_aliasing; + } + + private: + HInstruction* const reference_; + const size_t position_; // position in HeapLocationCollector's ref_info_array_. + + // Can only be referred to by a single name in the method. + bool is_singleton_; + // Is singleton and not returned to caller. + bool is_singleton_and_not_returned_; + // Is singleton and not used as an environment local of HDeoptimize. + bool is_singleton_and_not_deopt_visible_; + // Some heap locations with reference_ have array index aliasing, + // e.g. arr[i] and arr[j] may be the same location. + bool has_index_aliasing_; + + DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); +}; + +// A heap location is a reference-offset/index pair that a value can be loaded from +// or stored to. +class HeapLocation : public ArenaObject<kArenaAllocMisc> { + public: + static constexpr size_t kInvalidFieldOffset = -1; + + // TODO: more fine-grained array types. + static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; + + HeapLocation(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) + : ref_info_(ref_info), + offset_(offset), + index_(index), + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { + DCHECK(ref_info != nullptr); + DCHECK((offset == kInvalidFieldOffset && index != nullptr) || + (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } + } + + ReferenceInfo* GetReferenceInfo() const { return ref_info_; } + size_t GetOffset() const { return offset_; } + HInstruction* GetIndex() const { return index_; } + + // Returns the definition of declaring class' dex index. + // It's kDeclaringClassDefIndexForArrays for an array element. + int16_t GetDeclaringClassDefIndex() const { + return declaring_class_def_index_; + } + + bool IsArrayElement() const { + return index_ != nullptr; + } + + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + + private: + ReferenceInfo* const ref_info_; // reference for instance/static field or array access. + const size_t offset_; // offset of static/instance field. + HInstruction* const index_; // index of an array element. + const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. This gives + // better info on whether a singleton's location + // value may be killed by loop side effects. + + DISALLOW_COPY_AND_ASSIGN(HeapLocation); +}; + +// A HeapLocationCollector collects all relevant heap locations and keeps +// an aliasing matrix for all locations. +class HeapLocationCollector : public HGraphVisitor { + public: + static constexpr size_t kHeapLocationNotFound = -1; + // Start with a single uint32_t word. That's enough bits for pair-wise + // aliasing matrix of 8 heap locations. + static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; + + explicit HeapLocationCollector(HGraph* graph) + : HGraphVisitor(graph), + ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), + heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), + aliasing_matrix_(graph->GetArena(), + kInitialAliasingMatrixBitVectorSize, + true, + kArenaAllocLSE), + has_heap_stores_(false), + has_volatile_(false), + has_monitor_operations_(false) {} + + void CleanUp() { + heap_locations_.clear(); + ref_info_array_.clear(); + } + + size_t GetNumberOfHeapLocations() const { + return heap_locations_.size(); + } + + HeapLocation* GetHeapLocation(size_t index) const { + return heap_locations_[index]; + } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType()) { + ref = ref->InputAt(0); + } + return ref; + } + + ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { + for (size_t i = 0; i < ref_info_array_.size(); i++) { + ReferenceInfo* ref_info = ref_info_array_[i]; + if (ref_info->GetReference() == ref) { + DCHECK_EQ(i, ref_info->GetPosition()); + return ref_info; + } + } + return nullptr; + } + + bool HasHeapStores() const { + return has_heap_stores_; + } + + bool HasVolatile() const { + return has_volatile_; + } + + bool HasMonitorOps() const { + return has_monitor_operations_; + } + + // Find and return the heap location index in heap_locations_. + size_t FindHeapLocationIndex(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) const { + for (size_t i = 0; i < heap_locations_.size(); i++) { + HeapLocation* loc = heap_locations_[i]; + if (loc->GetReferenceInfo() == ref_info && + loc->GetOffset() == offset && + loc->GetIndex() == index && + loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { + return i; + } + } + return kHeapLocationNotFound; + } + + // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. + bool MayAlias(size_t index1, size_t index2) const { + if (index1 < index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); + } else if (index1 > index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); + } else { + DCHECK(false) << "index1 and index2 are expected to be different"; + return true; + } + } + + void BuildAliasingMatrix() { + const size_t number_of_locations = heap_locations_.size(); + if (number_of_locations == 0) { + return; + } + size_t pos = 0; + // Compute aliasing info between every pair of different heap locations. + // Save the result in a matrix represented as a BitVector. + for (size_t i = 0; i < number_of_locations - 1; i++) { + for (size_t j = i + 1; j < number_of_locations; j++) { + if (ComputeMayAlias(i, j)) { + aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); + } + pos++; + } + } + } + + private: + // An allocation cannot alias with a name which already exists at the point + // of the allocation, such as a parameter or a load happening before the allocation. + bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { + // Any reference that can alias with the allocation must appear after it in the block/in + // the block's successors. In reverse post order, those instructions will be visited after + // the allocation. + return ref_info2->GetPosition() >= ref_info1->GetPosition(); + } + return true; + } + + bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1 == ref_info2) { + return true; + } else if (ref_info1->IsSingleton()) { + return false; + } else if (ref_info2->IsSingleton()) { + return false; + } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || + !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { + return false; + } + return true; + } + + // `index1` and `index2` are indices in the array of collected heap locations. + // Returns the position in the bit vector that tracks whether the two heap + // locations may alias. + size_t AliasingMatrixPosition(size_t index1, size_t index2) const { + DCHECK(index2 > index1); + const size_t number_of_locations = heap_locations_.size(); + // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). + return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); + } + + // An additional position is passed in to make sure the calculated position is correct. + size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { + size_t calculated_position = AliasingMatrixPosition(index1, index2); + DCHECK_EQ(calculated_position, position); + return calculated_position; + } + + // Compute if two locations may alias to each other. + bool ComputeMayAlias(size_t index1, size_t index2) const { + HeapLocation* loc1 = heap_locations_[index1]; + HeapLocation* loc2 = heap_locations_[index2]; + if (loc1->GetOffset() != loc2->GetOffset()) { + // Either two different instance fields, or one is an instance + // field and the other is an array element. + return false; + } + if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { + // Different types. + return false; + } + if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { + return false; + } + if (loc1->IsArrayElement() && loc2->IsArrayElement()) { + HInstruction* array_index1 = loc1->GetIndex(); + HInstruction* array_index2 = loc2->GetIndex(); + DCHECK(array_index1 != nullptr); + DCHECK(array_index2 != nullptr); + if (array_index1->IsIntConstant() && + array_index2->IsIntConstant() && + array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { + // Different constant indices do not alias. + return false; + } + ReferenceInfo* ref_info = loc1->GetReferenceInfo(); + ref_info->SetHasIndexAliasing(true); + } + return true; + } + + ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { + ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); + if (ref_info == nullptr) { + size_t pos = ref_info_array_.size(); + ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); + ref_info_array_.push_back(ref_info); + } + return ref_info; + } + + void CreateReferenceInfoForReferenceType(HInstruction* instruction) { + if (instruction->GetType() != Primitive::kPrimNot) { + return; + } + DCHECK(FindReferenceInfoOf(instruction) == nullptr); + GetOrCreateReferenceInfo(instruction); + } + + HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) { + HInstruction* original_ref = HuntForOriginalReference(ref); + ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); + size_t heap_location_idx = FindHeapLocationIndex( + ref_info, offset, index, declaring_class_def_index); + if (heap_location_idx == kHeapLocationNotFound) { + HeapLocation* heap_loc = new (GetGraph()->GetArena()) + HeapLocation(ref_info, offset, index, declaring_class_def_index); + heap_locations_.push_back(heap_loc); + return heap_loc; + } + return heap_locations_[heap_location_idx]; + } + + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + if (field_info.IsVolatile()) { + has_volatile_ = true; + } + const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); + const size_t offset = field_info.GetFieldOffset().SizeValue(); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + } + + void VisitArrayAccess(HInstruction* array, HInstruction* index) { + GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, + index, HeapLocation::kDeclaringClassDefIndexForArrays); + } + + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + if (location->GetReferenceInfo()->IsSingleton()) { + // A singleton's location value may be killed by loop side effects if it's + // defined before that loop, and it's stored into inside that loop. + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + HInstruction* ref = location->GetReferenceInfo()->GetReference(); + DCHECK(ref->IsNewInstance()); + if (loop_info->IsDefinedOutOfTheLoop(ref)) { + // ref's location value may be killed by this loop's side effects. + location->SetValueKilledByLoopSideEffects(true); + } else { + // ref is defined inside this loop so this loop's side effects cannot + // kill its location value at the loop header since ref/its location doesn't + // exist yet at the loop header. + } + } + } else { + // For non-singletons, value_killed_by_loop_side_effects_ is inited to + // true. + DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); + } + } + + void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + } + + // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses + // since we cannot accurately track the fields. + + void VisitArrayGet(HArrayGet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + has_heap_stores_ = true; + } + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { + // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. + CreateReferenceInfoForReferenceType(new_instance); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitParameterValue(HParameterValue* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitSelect(HSelect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { + has_monitor_operations_ = true; + } + + ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. + ArenaVector<HeapLocation*> heap_locations_; // All heap locations. + ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. + bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better + // alias analysis and won't be as effective. + bool has_volatile_; // If there are volatile field accesses. + bool has_monitor_operations_; // If there are monitor operations. + + DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); +}; + +class LoadStoreAnalysis : public HOptimization { + public: + explicit LoadStoreAnalysis(HGraph* graph) + : HOptimization(graph, kLoadStoreAnalysisPassName), + heap_location_collector_(graph) {} + + const HeapLocationCollector& GetHeapLocationCollector() const { + return heap_location_collector_; + } + + void Run() OVERRIDE; + + static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis"; + + private: + HeapLocationCollector heap_location_collector_; + + DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc new file mode 100644 index 0000000000..24187777f6 --- /dev/null +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" +#include "nodes.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +class LoadStoreAnalysisTest : public CommonCompilerTest { + public: + LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; +}; + +TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // array ParameterValue + // index ParameterValue + // c1 IntConstant + // c2 IntConstant + // c3 IntConstant + // array_get1 ArrayGet [array, c1] + // array_get2 ArrayGet [array, c2] + // array_set1 ArraySet [array, c1, c3] + // array_set2 ArraySet [array, index, c3] + HInstruction* array = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* index = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0); + HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0); + HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0); + HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0); + entry->AddInstruction(array); + entry->AddInstruction(index); + entry->AddInstruction(array_get1); + entry->AddInstruction(array_get2); + entry->AddInstruction(array_set1); + entry->AddInstruction(array_set2); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph_, it must see following heap locations + // array[c1], array[c2], array[index]; and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); + size_t field_off = HeapLocation::kInvalidFieldOffset; + size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; + size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def); + // must find this reference info for array in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations; + // and array[1], array[2], array[3] should be different heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc1 != loc2); + ASSERT_TRUE(loc2 != loc3); + ASSERT_TRUE(loc1 != loc3); + + // Test alias relationships after building aliasing matrix. + // array[1] and array[2] clearly should not alias; + // array[index] should alias with the others, because index is an unknow value. + heap_location_collector.BuildAliasingMatrix(); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); +} + +TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // object ParameterValue + // c1 IntConstant + // set_field10 InstanceFieldSet [object, c1, 10] + // get_field10 InstanceFieldGet [object, 10] + // get_field20 InstanceFieldGet [object, 20] + + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + Primitive::kPrimNot); + HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, + c1, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(20), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + entry->AddInstruction(object); + entry->AddInstruction(set_field10); + entry->AddInstruction(get_field10); + entry->AddInstruction(get_field20); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph, it must see following heap locations + // object.field10, object.field20 and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object); + size_t loc1 = heap_location_collector.FindHeapLocationIndex( + ref, 10, nullptr, kUnknownClassDefIndex); + size_t loc2 = heap_location_collector.FindHeapLocationIndex( + ref, 20, nullptr, kUnknownClassDefIndex); + // must find references info for object and in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + // different fields of same object. + ASSERT_TRUE(loc1 != loc2); + // accesses to different fields of the same object should not alias. + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 76c9d2324b..211528b4bd 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "escape.h" @@ -23,477 +24,6 @@ namespace art { -class ReferenceInfo; - -// A cap for the number of heap locations to prevent pathological time/space consumption. -// The number of heap locations for most of the methods stays below this threshold. -constexpr size_t kMaxNumberOfHeapLocations = 32; - -// A ReferenceInfo contains additional info about a reference such as -// whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { - public: - ReferenceInfo(HInstruction* reference, size_t pos) - : reference_(reference), - position_(pos), - is_singleton_(true), - is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true), - has_index_aliasing_(false) { - CalculateEscape(reference_, - nullptr, - &is_singleton_, - &is_singleton_and_not_returned_, - &is_singleton_and_not_deopt_visible_); - } - - HInstruction* GetReference() const { - return reference_; - } - - size_t GetPosition() const { - return position_; - } - - // Returns true if reference_ is the only name that can refer to its value during - // the lifetime of the method. So it's guaranteed to not have any alias in - // the method (including its callees). - bool IsSingleton() const { - return is_singleton_; - } - - // Returns true if reference_ is a singleton and not returned to the caller or - // used as an environment local of an HDeoptimize instruction. - // The allocation and stores into reference_ may be eliminated for such cases. - bool IsSingletonAndRemovable() const { - return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; - } - - // Returns true if reference_ is a singleton and returned to the caller or - // used as an environment local of an HDeoptimize instruction. - bool IsSingletonAndNonRemovable() const { - return is_singleton_ && - (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); - } - - bool HasIndexAliasing() { - return has_index_aliasing_; - } - - void SetHasIndexAliasing(bool has_index_aliasing) { - // Only allow setting to true. - DCHECK(has_index_aliasing); - has_index_aliasing_ = has_index_aliasing; - } - - private: - HInstruction* const reference_; - const size_t position_; // position in HeapLocationCollector's ref_info_array_. - - // Can only be referred to by a single name in the method. - bool is_singleton_; - // Is singleton and not returned to caller. - bool is_singleton_and_not_returned_; - // Is singleton and not used as an environment local of HDeoptimize. - bool is_singleton_and_not_deopt_visible_; - // Some heap locations with reference_ have array index aliasing, - // e.g. arr[i] and arr[j] may be the same location. - bool has_index_aliasing_; - - DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); -}; - -// A heap location is a reference-offset/index pair that a value can be loaded from -// or stored to. -class HeapLocation : public ArenaObject<kArenaAllocMisc> { - public: - static constexpr size_t kInvalidFieldOffset = -1; - - // TODO: more fine-grained array types. - static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; - - HeapLocation(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) - : ref_info_(ref_info), - offset_(offset), - index_(index), - declaring_class_def_index_(declaring_class_def_index), - value_killed_by_loop_side_effects_(true) { - DCHECK(ref_info != nullptr); - DCHECK((offset == kInvalidFieldOffset && index != nullptr) || - (offset != kInvalidFieldOffset && index == nullptr)); - if (ref_info->IsSingleton() && !IsArrayElement()) { - // Assume this location's value cannot be killed by loop side effects - // until proven otherwise. - value_killed_by_loop_side_effects_ = false; - } - } - - ReferenceInfo* GetReferenceInfo() const { return ref_info_; } - size_t GetOffset() const { return offset_; } - HInstruction* GetIndex() const { return index_; } - - // Returns the definition of declaring class' dex index. - // It's kDeclaringClassDefIndexForArrays for an array element. - int16_t GetDeclaringClassDefIndex() const { - return declaring_class_def_index_; - } - - bool IsArrayElement() const { - return index_ != nullptr; - } - - bool IsValueKilledByLoopSideEffects() const { - return value_killed_by_loop_side_effects_; - } - - void SetValueKilledByLoopSideEffects(bool val) { - value_killed_by_loop_side_effects_ = val; - } - - private: - ReferenceInfo* const ref_info_; // reference for instance/static field or array access. - const size_t offset_; // offset of static/instance field. - HInstruction* const index_; // index of an array element. - const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop - // side effects because this location is stored - // into inside a loop. This gives - // better info on whether a singleton's location - // value may be killed by loop side effects. - - DISALLOW_COPY_AND_ASSIGN(HeapLocation); -}; - -static HInstruction* HuntForOriginalReference(HInstruction* ref) { - DCHECK(ref != nullptr); - while (ref->IsNullCheck() || ref->IsBoundType()) { - ref = ref->InputAt(0); - } - return ref; -} - -// A HeapLocationCollector collects all relevant heap locations and keeps -// an aliasing matrix for all locations. -class HeapLocationCollector : public HGraphVisitor { - public: - static constexpr size_t kHeapLocationNotFound = -1; - // Start with a single uint32_t word. That's enough bits for pair-wise - // aliasing matrix of 8 heap locations. - static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - - explicit HeapLocationCollector(HGraph* graph) - : HGraphVisitor(graph), - ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), - heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), - aliasing_matrix_(graph->GetArena(), - kInitialAliasingMatrixBitVectorSize, - true, - kArenaAllocLSE), - has_heap_stores_(false), - has_volatile_(false), - has_monitor_operations_(false) {} - - size_t GetNumberOfHeapLocations() const { - return heap_locations_.size(); - } - - HeapLocation* GetHeapLocation(size_t index) const { - return heap_locations_[index]; - } - - ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { - for (size_t i = 0; i < ref_info_array_.size(); i++) { - ReferenceInfo* ref_info = ref_info_array_[i]; - if (ref_info->GetReference() == ref) { - DCHECK_EQ(i, ref_info->GetPosition()); - return ref_info; - } - } - return nullptr; - } - - bool HasHeapStores() const { - return has_heap_stores_; - } - - bool HasVolatile() const { - return has_volatile_; - } - - bool HasMonitorOps() const { - return has_monitor_operations_; - } - - // Find and return the heap location index in heap_locations_. - size_t FindHeapLocationIndex(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) const { - for (size_t i = 0; i < heap_locations_.size(); i++) { - HeapLocation* loc = heap_locations_[i]; - if (loc->GetReferenceInfo() == ref_info && - loc->GetOffset() == offset && - loc->GetIndex() == index && - loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { - return i; - } - } - return kHeapLocationNotFound; - } - - // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. - bool MayAlias(size_t index1, size_t index2) const { - if (index1 < index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); - } else if (index1 > index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); - } else { - DCHECK(false) << "index1 and index2 are expected to be different"; - return true; - } - } - - void BuildAliasingMatrix() { - const size_t number_of_locations = heap_locations_.size(); - if (number_of_locations == 0) { - return; - } - size_t pos = 0; - // Compute aliasing info between every pair of different heap locations. - // Save the result in a matrix represented as a BitVector. - for (size_t i = 0; i < number_of_locations - 1; i++) { - for (size_t j = i + 1; j < number_of_locations; j++) { - if (ComputeMayAlias(i, j)) { - aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); - } - pos++; - } - } - } - - private: - // An allocation cannot alias with a name which already exists at the point - // of the allocation, such as a parameter or a load happening before the allocation. - bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { - // Any reference that can alias with the allocation must appear after it in the block/in - // the block's successors. In reverse post order, those instructions will be visited after - // the allocation. - return ref_info2->GetPosition() >= ref_info1->GetPosition(); - } - return true; - } - - bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1 == ref_info2) { - return true; - } else if (ref_info1->IsSingleton()) { - return false; - } else if (ref_info2->IsSingleton()) { - return false; - } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || - !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { - return false; - } - return true; - } - - // `index1` and `index2` are indices in the array of collected heap locations. - // Returns the position in the bit vector that tracks whether the two heap - // locations may alias. - size_t AliasingMatrixPosition(size_t index1, size_t index2) const { - DCHECK(index2 > index1); - const size_t number_of_locations = heap_locations_.size(); - // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). - return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); - } - - // An additional position is passed in to make sure the calculated position is correct. - size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { - size_t calculated_position = AliasingMatrixPosition(index1, index2); - DCHECK_EQ(calculated_position, position); - return calculated_position; - } - - // Compute if two locations may alias to each other. - bool ComputeMayAlias(size_t index1, size_t index2) const { - HeapLocation* loc1 = heap_locations_[index1]; - HeapLocation* loc2 = heap_locations_[index2]; - if (loc1->GetOffset() != loc2->GetOffset()) { - // Either two different instance fields, or one is an instance - // field and the other is an array element. - return false; - } - if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { - // Different types. - return false; - } - if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { - return false; - } - if (loc1->IsArrayElement() && loc2->IsArrayElement()) { - HInstruction* array_index1 = loc1->GetIndex(); - HInstruction* array_index2 = loc2->GetIndex(); - DCHECK(array_index1 != nullptr); - DCHECK(array_index2 != nullptr); - if (array_index1->IsIntConstant() && - array_index2->IsIntConstant() && - array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { - // Different constant indices do not alias. - return false; - } - ReferenceInfo* ref_info = loc1->GetReferenceInfo(); - ref_info->SetHasIndexAliasing(true); - } - return true; - } - - ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { - ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); - if (ref_info == nullptr) { - size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); - ref_info_array_.push_back(ref_info); - } - return ref_info; - } - - void CreateReferenceInfoForReferenceType(HInstruction* instruction) { - if (instruction->GetType() != Primitive::kPrimNot) { - return; - } - DCHECK(FindReferenceInfoOf(instruction) == nullptr); - GetOrCreateReferenceInfo(instruction); - } - - HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); - ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); - size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); - if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (GetGraph()->GetArena()) - HeapLocation(ref_info, offset, index, declaring_class_def_index); - heap_locations_.push_back(heap_loc); - return heap_loc; - } - return heap_locations_[heap_location_idx]; - } - - HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { - if (field_info.IsVolatile()) { - has_volatile_ = true; - } - const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); - const size_t offset = field_info.GetFieldOffset().SizeValue(); - return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - } - - void VisitArrayAccess(HInstruction* array, HInstruction* index) { - GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, - index, HeapLocation::kDeclaringClassDefIndexForArrays); - } - - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - if (location->GetReferenceInfo()->IsSingleton()) { - // A singleton's location value may be killed by loop side effects if it's - // defined before that loop, and it's stored into inside that loop. - HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - HInstruction* ref = location->GetReferenceInfo()->GetReference(); - DCHECK(ref->IsNewInstance()); - if (loop_info->IsDefinedOutOfTheLoop(ref)) { - // ref's location value may be killed by this loop's side effects. - location->SetValueKilledByLoopSideEffects(true); - } else { - // ref is defined inside this loop so this loop's side effects cannot - // kill its location value at the loop header since ref/its location doesn't - // exist yet at the loop header. - } - } - } else { - // For non-singletons, value_killed_by_loop_side_effects_ is inited to - // true. - DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); - } - } - - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - } - - // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses - // since we cannot accurately track the fields. - - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitArraySet(HArraySet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - has_heap_stores_ = true; - } - - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - CreateReferenceInfoForReferenceType(new_instance); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitParameterValue(HParameterValue* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitSelect(HSelect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { - has_monitor_operations_ = true; - } - - ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. - ArenaVector<HeapLocation*> heap_locations_; // All heap locations. - ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. - bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better - // alias analysis and won't be as effective. - bool has_volatile_; // If there are volatile field accesses. - bool has_monitor_operations_; // If there are monitor operations. - - DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); -}; - // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. // A heap location can be set to kUnknownHeapValue when: // - initially set a value. @@ -516,7 +46,7 @@ class LSEVisitor : public HGraphVisitor { side_effects_(side_effects), heap_values_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(heap_locations_collector. - GetNumberOfHeapLocations(), + GetNumberOfHeapLocations(), kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), @@ -760,7 +290,7 @@ class LSEVisitor : public HGraphVisitor { size_t offset, HInstruction* index, int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -827,7 +357,7 @@ class LSEVisitor : public HGraphVisitor { HInstruction* index, int16_t declaring_class_def_index, HInstruction* value) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -1127,25 +657,12 @@ void LoadStoreElimination::Run() { // Skip this optimization. return; } - HeapLocationCollector heap_location_collector(graph_); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - heap_location_collector.VisitBasicBlock(block); - } - if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { - // Bail out if there are too many heap locations to deal with. - return; - } - if (!heap_location_collector.HasHeapStores()) { - // Without heap stores, this pass would act mostly as GVN on heap accesses. + const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector(); + if (heap_location_collector.GetNumberOfHeapLocations() == 0) { + // No HeapLocation information from LSA, skip this optimization. return; } - if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) { - // Don't do load/store elimination if the method has volatile field accesses or - // monitor operations, for now. - // TODO: do it right. - return; - } - heap_location_collector.BuildAliasingMatrix(); + LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 1d9e5c8da6..efe71c733a 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -22,12 +22,16 @@ namespace art { class SideEffectsAnalysis; +class LoadStoreAnalysis; class LoadStoreElimination : public HOptimization { public: - LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects) + LoadStoreElimination(HGraph* graph, + const SideEffectsAnalysis& side_effects, + const LoadStoreAnalysis& lsa) : HOptimization(graph, kLoadStoreEliminationPassName), - side_effects_(side_effects) {} + side_effects_(side_effects), + lsa_(lsa) {} void Run() OVERRIDE; @@ -35,6 +39,7 @@ class LoadStoreElimination : public HOptimization { private: const SideEffectsAnalysis& side_effects_; + const LoadStoreAnalysis& lsa_; DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination); }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 833f32b282..bde7f2c1e0 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2636,15 +2636,17 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) { switch (rhs) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: - return os << "string_init"; + return os << "StringInit"; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - return os << "recursive"; + return os << "Recursive"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - return os << "direct"; + return os << "Direct"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - return os << "dex_cache_pc_relative"; + return os << "DexCachePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "dex_cache_via_method"; + return os << "DexCacheViaMethod"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 72774da1d1..4d96fbe24c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -4153,6 +4153,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Use the method's own ArtMethod* loaded by the register allocator. kRecursive, + // Use PC-relative boot image ArtMethod* address that will be known at link time. + // Used for boot image methods referenced by boot image code. + kBootImageLinkTimePcRelative, + // Use ArtMethod* at a known address, embed the direct address in the code. // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, @@ -4292,6 +4296,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasPcRelativeMethodLoadKind() const { + return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; + } bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f928f71209..e5ab00bce3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -83,6 +83,7 @@ #include "jit/jit_code_cache.h" #include "jni/quick/jni_compiler.h" #include "licm.h" +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "loop_optimization.h" #include "nodes.h" @@ -465,7 +466,8 @@ static HOptimization* BuildOptimization( const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles, SideEffectsAnalysis* most_recent_side_effects, - HInductionVarAnalysis* most_recent_induction) { + HInductionVarAnalysis* most_recent_induction, + LoadStoreAnalysis* most_recent_lsa) { std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); @@ -499,15 +501,18 @@ static HOptimization* BuildOptimization( } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str()); + return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { return new (arena) IntrinsicsRecognizer(graph, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { CHECK(most_recent_side_effects != nullptr); return new (arena) LICM(graph, *most_recent_side_effects, stats); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + return new (arena) LoadStoreAnalysis(graph); } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { CHECK(most_recent_side_effects != nullptr); - return new (arena) LoadStoreElimination(graph, *most_recent_side_effects); + CHECK(most_recent_lsa != nullptr); + return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa); } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { @@ -556,6 +561,7 @@ static ArenaVector<HOptimization*> BuildOptimizations( // in the pass name list. SideEffectsAnalysis* most_recent_side_effects = nullptr; HInductionVarAnalysis* most_recent_induction = nullptr; + LoadStoreAnalysis* most_recent_lsa = nullptr; ArenaVector<HOptimization*> ret(arena->Adapter()); for (const std::string& pass_name : pass_names) { HOptimization* opt = BuildOptimization( @@ -568,7 +574,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( dex_compilation_unit, handles, most_recent_side_effects, - most_recent_induction); + most_recent_induction, + most_recent_lsa); CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; ret.push_back(opt); @@ -577,6 +584,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { most_recent_induction = down_cast<HInductionVarAnalysis*>(opt); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt); } } return ret; @@ -763,7 +772,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination( graph, stats, "dead_code_elimination$final"); HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); - InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier( + graph, codegen, driver, stats); HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); HConstantFolding* fold2 = new (arena) HConstantFolding( graph, "constant_folding$after_inlining"); @@ -777,15 +787,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2); + LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); + LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa); HSharpening* sharpening = new (arena) HSharpening( graph, codegen, dex_compilation_unit, driver, handles); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_inlining"); + graph, codegen, driver, stats, "instruction_simplifier$after_inlining"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_bce"); + graph, codegen, driver, stats, "instruction_simplifier$after_bce"); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$before_codegen"); + graph, codegen, driver, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); @@ -817,6 +828,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, side_effects2, + lsa, lse, cha_guard, dce3, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index ef2c432086..bce54bf49a 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -58,6 +58,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative pointer to a method, + // we need to add the base as the special input. + if (invoke->GetMethodLoadKind() == + HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { + InitializePCRelativeBasePointer(); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base_); + } + } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); switch (load_kind) { diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a1c916f43a..2743df9dcf 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -205,13 +205,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // method pointer from the invoke. if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { - DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } bool base_added = false; if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasPcRelativeDexCache() && + invoke_static_or_direct->HasPcRelativeMethodLoadKind() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); // Add the extra parameter. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 9a0316330d..7b8104b8ca 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -16,6 +16,7 @@ #include "sharpening.h" +#include "art_method-inl.h" #include "base/casts.h" #include "base/enums.h" #include "class_linker.h" @@ -41,7 +42,9 @@ void HSharpening::Run() { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_); + SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), + codegen_, + compiler_driver_); } else if (instruction->IsLoadString()) { ProcessLoadString(instruction->AsLoadString()); } @@ -68,9 +71,21 @@ static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) return IsInBootImage(method) && !options.GetCompilePic(); } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { + DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + if (!compiler_driver->GetSupportBootImageFixup()) { + return false; + } + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); + DCHECK(klass != nullptr); + const DexFile& dex_file = klass->GetDexFile(); + return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); +} void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen) { + CodeGenerator* codegen, + CompilerDriver* compiler_driver) { if (invoke->IsStringInit()) { // Not using the dex cache arrays. But we could still try to use a better dispatch... // TODO: Use direct_method and direct_code for the appropriate StringFactory method. @@ -108,6 +123,10 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (codegen->GetCompilerOptions().IsBootImage() && + BootImageAOTCanEmbedMethod(callee, compiler_driver)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { // Use PC-relative access to the dex cache arrays. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; @@ -167,8 +186,8 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, if (!compiler_driver->GetSupportBootImageFixup()) { // compiler_driver_test. Do not sharpen. desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } else if ((klass != nullptr) && compiler_driver->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + } else if ((klass != nullptr) && + compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 10707c796f..f74b0afdbf 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -55,7 +55,9 @@ class HSharpening : public HOptimization { REQUIRES_SHARED(Locks::mutator_lock_); // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen); + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); private: void ProcessLoadString(HLoadString* load_string); diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index e628a9f40d..d1da67f26e 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -1252,7 +1252,39 @@ END art_quick_handle_fill_data .extern artLockObjectFromCode ENTRY art_quick_lock_object beqz $a0, art_quick_throw_null_pointer_exception + li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE + li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED +.Lretry_lock: + lw $t0, THREAD_ID_OFFSET(rSELF) # TODO: Can the thread ID really change during the loop? + ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + and $t2, $t1, $t3 # zero the gc bits + bnez $t2, .Lnot_unlocked # already thin locked + # Unlocked case - $t1: original lock word that's zero except for the read barrier bits. + or $t2, $t1, $t0 # $t2 holds thread id with count of 0 with preserved read barrier bits + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqz $t2, .Lretry_lock # store failed, retry nop + jalr $zero, $ra + sync # full (LoadLoad|LoadStore) memory barrier +.Lnot_unlocked: + # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits + srl $t2, $t1, LOCK_WORD_STATE_SHIFT + bnez $t2, .Lslow_lock # if either of the top two bits are set, go slow path + xor $t2, $t1, $t0 # lock_word.ThreadId() ^ self->ThreadId() + andi $t2, $t2, 0xFFFF # zero top 16 bits + bnez $t2, .Lslow_lock # lock word and self thread id's match -> recursive lock + # otherwise contention, go to slow path + and $t2, $t1, $t3 # zero the gc bits + addu $t2, $t2, $t8 # increment count in lock word + srl $t2, $t2, LOCK_WORD_STATE_SHIFT # if the first gc state bit is set, we overflowed. + bnez $t2, .Lslow_lock # if we overflow the count go slow path + addu $t2, $t1, $t8 # increment count for real + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqz $t2, .Lretry_lock # store failed, retry + nop + jalr $zero, $ra + nop +.Lslow_lock: SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case we block la $t9, artLockObjectFromCode jalr $t9 # (Object* obj, Thread*) @@ -1276,11 +1308,55 @@ END art_quick_lock_object_no_inline .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object beqz $a0, art_quick_throw_null_pointer_exception + li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE + li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED +.Lretry_unlock: +#ifndef USE_READ_BARRIER + lw $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) # Need to use atomic read-modify-write for read barrier +#endif + srl $t2, $t1, LOCK_WORD_STATE_SHIFT + bnez $t2, .Lslow_unlock # if either of the top two bits are set, go slow path + lw $t0, THREAD_ID_OFFSET(rSELF) + and $t2, $t1, $t3 # zero the gc bits + xor $t2, $t2, $t0 # lock_word.ThreadId() ^ self->ThreadId() + andi $t2, $t2, 0xFFFF # zero top 16 bits + bnez $t2, .Lslow_unlock # do lock word and self thread id's match? + and $t2, $t1, $t3 # zero the gc bits + bgeu $t2, $t8, .Lrecursive_thin_unlock + # transition to unlocked + nor $t2, $zero, $t3 # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED + and $t2, $t1, $t2 # $t2: zero except for the preserved gc bits + sync # full (LoadStore|StoreStore) memory barrier +#ifndef USE_READ_BARRIER + jalr $zero, $ra + sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqz $t2, .Lretry_unlock # store failed, retry nop - SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case exception allocation triggers GC + jalr $zero, $ra + nop +#endif +.Lrecursive_thin_unlock: + # t1: original lock word + subu $t2, $t1, $t8 # decrement count +#ifndef USE_READ_BARRIER + jalr $zero, $ra + sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqz $t2, .Lretry_unlock # store failed, retry + nop + jalr $zero, $ra + nop +#endif +.Lslow_unlock: + SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case exception allocation triggers GC la $t9, artUnlockObjectFromCode - jalr $t9 # (Object* obj, Thread*) - move $a1, rSELF # pass Thread::Current + jalr $t9 # (Object* obj, Thread*) + move $a1, rSELF # pass Thread::Current RETURN_IF_ZERO END art_quick_unlock_object diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 40bad16291..c9eeb7ca65 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -1222,8 +1222,38 @@ END art_quick_handle_fill_data */ .extern artLockObjectFromCode ENTRY_NO_GP art_quick_lock_object - beq $a0, $zero, art_quick_throw_null_pointer_exception + beqzc $a0, art_quick_throw_null_pointer_exception + li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE + li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED +.Lretry_lock: + lw $t0, THREAD_ID_OFFSET(rSELF) # TODO: Can the thread ID really change during the loop? + ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + and $t2, $t1, $t3 # zero the gc bits + bnezc $t2, .Lnot_unlocked # already thin locked + # Unlocked case - $t1: original lock word that's zero except for the read barrier bits. + or $t2, $t1, $t0 # $t2 holds thread id with count of 0 with preserved read barrier bits + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqzc $t2, .Lretry_lock # store failed, retry + sync # full (LoadLoad|LoadStore) memory barrier + jic $ra, 0 +.Lnot_unlocked: + # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits + srl $t2, $t1, LOCK_WORD_STATE_SHIFT + bnezc $t2, .Lslow_lock # if either of the top two bits are set, go slow path + xor $t2, $t1, $t0 # lock_word.ThreadId() ^ self->ThreadId() + andi $t2, $t2, 0xFFFF # zero top 16 bits + bnezc $t2, .Lslow_lock # lock word and self thread id's match -> recursive lock + # otherwise contention, go to slow path + and $t2, $t1, $t3 # zero the gc bits + addu $t2, $t2, $t8 # increment count in lock word + srl $t2, $t2, LOCK_WORD_STATE_SHIFT # if the first gc state bit is set, we overflowed. + bnezc $t2, .Lslow_lock # if we overflow the count go slow path + addu $t2, $t1, $t8 # increment count for real + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqzc $t2, .Lretry_lock # store failed, retry nop + jic $ra, 0 +.Lslow_lock: .cpsetup $t9, $t8, art_quick_lock_object SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case we block jal artLockObjectFromCode # (Object* obj, Thread*) @@ -1246,8 +1276,48 @@ END art_quick_lock_object_no_inline */ .extern artUnlockObjectFromCode ENTRY_NO_GP art_quick_unlock_object - beq $a0, $zero, art_quick_throw_null_pointer_exception + beqzc $a0, art_quick_throw_null_pointer_exception + li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE + li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED +.Lretry_unlock: +#ifndef USE_READ_BARRIER + lw $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) # Need to use atomic read-modify-write for read barrier +#endif + srl $t2, $t1, LOCK_WORD_STATE_SHIFT + bnezc $t2, .Lslow_unlock # if either of the top two bits are set, go slow path + lw $t0, THREAD_ID_OFFSET(rSELF) + and $t2, $t1, $t3 # zero the gc bits + xor $t2, $t2, $t0 # lock_word.ThreadId() ^ self->ThreadId() + andi $t2, $t2, 0xFFFF # zero top 16 bits + bnezc $t2, .Lslow_unlock # do lock word and self thread id's match? + and $t2, $t1, $t3 # zero the gc bits + bgeuc $t2, $t8, .Lrecursive_thin_unlock + # transition to unlocked + nor $t2, $zero, $t3 # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED + and $t2, $t1, $t2 # $t2: zero except for the preserved gc bits + sync # full (LoadStore|StoreStore) memory barrier +#ifndef USE_READ_BARRIER + sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqzc $t2, .Lretry_unlock # store failed, retry nop +#endif + jic $ra, 0 +.Lrecursive_thin_unlock: + # t1: original lock word + subu $t2, $t1, $t8 # decrement count +#ifndef USE_READ_BARRIER + sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) +#else + sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) + beqzc $t2, .Lretry_unlock # store failed, retry + nop +#endif + jic $ra, 0 +.Lslow_unlock: .cpsetup $t9, $t8, art_quick_unlock_object SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case exception allocation triggers GC jal artUnlockObjectFromCode # (Object* obj, Thread*) diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc index e0af6e8dee..dd90a71240 100644 --- a/runtime/openjdkjvmti/ti_class.cc +++ b/runtime/openjdkjvmti/ti_class.cc @@ -129,6 +129,25 @@ static std::unique_ptr<const art::DexFile> MakeSingleDexFile(art::Thread* self, return dex_file; } +// A deleter that acts like the jvmtiEnv->Deallocate so that asan does not get tripped up. +// TODO We should everything use the actual jvmtiEnv->Allocate/Deallocate functions once we can +// figure out which env to use. +template <typename T> +class FakeJvmtiDeleter { + public: + FakeJvmtiDeleter() {} + + FakeJvmtiDeleter(FakeJvmtiDeleter&) = default; + FakeJvmtiDeleter(FakeJvmtiDeleter&&) = default; + FakeJvmtiDeleter& operator=(const FakeJvmtiDeleter&) = default; + + template <typename U> void operator()(const U* ptr) const { + if (ptr != nullptr) { + free(const_cast<U*>(ptr)); + } + } +}; + struct ClassCallback : public art::ClassLoadCallback { void ClassPreDefine(const char* descriptor, art::Handle<art::mirror::Class> klass, @@ -173,7 +192,8 @@ struct ClassCallback : public art::ClassLoadCallback { // Call all Non-retransformable agents. jint post_no_redefine_len = 0; unsigned char* post_no_redefine_dex_data = nullptr; - std::unique_ptr<const unsigned char> post_no_redefine_unique_ptr(nullptr); + std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>> + post_no_redefine_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>()); event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookNonRetransformable>( self, static_cast<JNIEnv*>(env), @@ -190,13 +210,16 @@ struct ClassCallback : public art::ClassLoadCallback { post_no_redefine_dex_data = const_cast<unsigned char*>(dex_file_copy->Begin()); post_no_redefine_len = dex_file_copy->Size(); } else { - post_no_redefine_unique_ptr = std::unique_ptr<const unsigned char>(post_no_redefine_dex_data); + post_no_redefine_unique_ptr = + std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>( + post_no_redefine_dex_data, FakeJvmtiDeleter<const unsigned char>()); DCHECK_GT(post_no_redefine_len, 0); } // Call all retransformable agents. jint final_len = 0; unsigned char* final_dex_data = nullptr; - std::unique_ptr<const unsigned char> final_dex_unique_ptr(nullptr); + std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>> + final_dex_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>()); event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>( self, static_cast<JNIEnv*>(env), @@ -213,7 +236,9 @@ struct ClassCallback : public art::ClassLoadCallback { final_dex_data = post_no_redefine_dex_data; final_len = post_no_redefine_len; } else { - final_dex_unique_ptr = std::unique_ptr<const unsigned char>(final_dex_data); + final_dex_unique_ptr = + std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>( + final_dex_data, FakeJvmtiDeleter<const unsigned char>()); DCHECK_GT(final_len, 0); } diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 3697f2176c..c46bd8d2b9 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1017,6 +1017,30 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { MemMap::Init(); + // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels. + // If we cannot reserve it, log a warning. + // Note: We allocate this first to have a good chance of grabbing the page. The address (0xebad..) + // is out-of-the-way enough that it should not collide with boot image mapping. + // Note: Don't request an error message. That will lead to a maps dump in the case of failure, + // leading to logspam. + { + constexpr uintptr_t kSentinelAddr = + RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize); + protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page", + reinterpret_cast<uint8_t*>(kSentinelAddr), + kPageSize, + PROT_NONE, + /* low_4g */ true, + /* reuse */ false, + /* error_msg */ nullptr)); + if (protected_fault_page_ == nullptr) { + LOG(WARNING) << "Could not reserve sentinel fault page"; + } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) { + LOG(WARNING) << "Could not reserve sentinel fault page at the right address."; + protected_fault_page_.reset(); + } + } + using Opt = RuntimeArgumentMap; VLOG(startup) << "Runtime::Init -verbose:startup enabled"; @@ -1401,27 +1425,6 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInitialAgents); } - // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels. - // If we cannot reserve it, log a warning. - // Note: This is allocated last so that the heap and other things have priority, if necessary. - { - constexpr uintptr_t kSentinelAddr = - RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize); - protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page", - reinterpret_cast<uint8_t*>(kSentinelAddr), - kPageSize, - PROT_NONE, - true, - false, - &error_msg)); - if (protected_fault_page_ == nullptr) { - LOG(WARNING) << "Could not reserve sentinel fault page: " << error_msg; - } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) { - LOG(WARNING) << "Could not reserve sentinel fault page at the right address."; - protected_fault_page_.reset(); - } - } - VLOG(startup) << "Runtime::Init exiting"; return true; diff --git a/test/409-materialized-condition/src/Main.java b/test/409-materialized-condition/src/Main.java index 0c179a99de..5f21bc3fbd 100644 --- a/test/409-materialized-condition/src/Main.java +++ b/test/409-materialized-condition/src/Main.java @@ -50,6 +50,49 @@ public class Main { return b; } + public static boolean $noinline$intEq0(int x) { + return x == 0; + } + + public static boolean $noinline$intNe0(int x) { + return x != 0; + } + + public static boolean $noinline$longEq0(long x) { + return x == 0; + } + + public static boolean $noinline$longNe0(long x) { + return x != 0; + } + + public static boolean $noinline$longEqCst(long x) { + return x == 0x0123456789ABCDEFL; + } + + public static boolean $noinline$longNeCst(long x) { + return x != 0x0123456789ABCDEFL; + } + + public static void assertEqual(boolean expected, boolean actual) { + if (expected != actual) { + throw new Error("Assertion failed: " + expected + " != " + actual); + } + } + + // The purpose of this method is to test code generation for a materialized + // HCondition that is not equality or inequality, and that has one boolean + // input. That can't be done directly, so we have to rely on the instruction + // simplifier to transform the control-flow graph appropriately. + public static boolean $noinline$booleanCondition(boolean in) { + int value = in ? 1 : 0; + + // Calling a non-inlineable method that uses `value` as well prevents a + // transformation of the return value into `false`. + $noinline$intNe0(value); + return value > 127; + } + public static void main(String[] args) { System.out.println("foo1"); int res = foo1(); @@ -62,5 +105,49 @@ public class Main { if (res != 42) { throw new Error("Unexpected return value for foo2: " + res + ", expected 42."); } + + assertEqual($noinline$booleanCondition(false), false); + assertEqual($noinline$booleanCondition(true), false); + + int[] int_inputs = {0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE, 42, -9000}; + long[] long_inputs = { + 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 0x100000000L, + 0x100000001L, -9000L, 0x0123456789ABCDEFL}; + + boolean[] int_eq_0_expected = {true, false, false, false, false, false, false}; + + for (int i = 0; i < int_inputs.length; i++) { + assertEqual(int_eq_0_expected[i], $noinline$intEq0(int_inputs[i])); + } + + boolean[] int_ne_0_expected = {false, true, true, true, true, true, true}; + + for (int i = 0; i < int_inputs.length; i++) { + assertEqual(int_ne_0_expected[i], $noinline$intNe0(int_inputs[i])); + } + + boolean[] long_eq_0_expected = {true, false, false, false, false, false, false, false, false}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(long_eq_0_expected[i], $noinline$longEq0(long_inputs[i])); + } + + boolean[] long_ne_0_expected = {false, true, true, true, true, true, true, true, true}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(long_ne_0_expected[i], $noinline$longNe0(long_inputs[i])); + } + + boolean[] long_eq_cst_expected = {false, false, false, false, false, false, false, false, true}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(long_eq_cst_expected[i], $noinline$longEqCst(long_inputs[i])); + } + + boolean[] long_ne_cst_expected = {true, true, true, true, true, true, true, true, false}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(long_ne_cst_expected[i], $noinline$longNeCst(long_inputs[i])); + } } } diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java index 70c5121a30..e887cd32a0 100644 --- a/test/476-checker-ctor-memory-barrier/src/Main.java +++ b/test/476-checker-ctor-memory-barrier/src/Main.java @@ -261,7 +261,7 @@ public class Main { /// CHECK-START: void Main.testNewString() inliner (after) /// CHECK-NOT: ConstructorFence - /// CHECK: InvokeStaticOrDirect method_load_kind:string_init + /// CHECK: InvokeStaticOrDirect method_load_kind:StringInit /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InvokeStaticOrDirect public static void testNewString() { diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java index 87ff3f7b5a..441dbbfcb6 100644 --- a/test/488-checker-inline-recursive-calls/src/Main.java +++ b/test/488-checker-inline-recursive-calls/src/Main.java @@ -25,10 +25,10 @@ public class Main { } /// CHECK-START: void Main.doTopCall(boolean) inliner (before) - /// CHECK-NOT: InvokeStaticOrDirect method_load_kind:recursive + /// CHECK-NOT: InvokeStaticOrDirect method_load_kind:Recursive /// CHECK-START: void Main.doTopCall(boolean) inliner (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:recursive + /// CHECK: InvokeStaticOrDirect method_load_kind:Recursive public static void doTopCall(boolean first_call) { if (first_call) { inline1(); diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java index dd77423870..3f81fd644a 100644 --- a/test/552-checker-sharpening/src/Main.java +++ b/test/552-checker-sharpening/src/Main.java @@ -42,27 +42,27 @@ public class Main { } /// CHECK-START: int Main.testSimple(int) sharpening (before) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_via_method + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCacheViaMethod /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after) /// CHECK-NOT: ArmDexCacheArraysBase - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-MIPS64: int Main.testSimple(int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-X86: int Main.testSimple(int) sharpening (after) /// CHECK-NOT: X86ComputeBaseMethodAddress - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after) /// CHECK: ArmDexCacheArraysBase @@ -78,33 +78,33 @@ public class Main { } /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_via_method + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCacheViaMethod /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after) /// CHECK-NOT: ArmDexCacheArraysBase - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-MIPS64: int Main.testDiamond(boolean, int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after) /// CHECK-NOT: X86ComputeBaseMethodAddress - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after) - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after) /// CHECK: ArmDexCacheArraysBase @@ -148,7 +148,7 @@ public class Main { /// CHECK-NEXT: X86ComputeBaseMethodAddress /// CHECK-NEXT: Goto /// CHECK: begin_block - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before) /// CHECK-NOT: ArmDexCacheArraysBase @@ -166,7 +166,7 @@ public class Main { /// CHECK-NEXT: ArmDexCacheArraysBase /// CHECK-NEXT: Goto /// CHECK: begin_block - /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:DexCachePcRelative public static int testLoop(int[] array, int x) { // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop. @@ -216,33 +216,27 @@ public class Main { /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} /// CHECK-START-MIPS64: java.lang.String Main.$noinline$getBootImageString() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} + /// CHECK: LoadString load_kind:{{BootImageAddress|BssEntry}} public static String $noinline$getBootImageString() { // Prevent inlining to avoid the string comparison being optimized away. @@ -285,33 +279,27 @@ public class Main { /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getStringClass() sharpening (after) // Note: load kind depends on PIC/non-PIC - // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. - /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK: LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String public static Class<?> $noinline$getStringClass() { // Prevent inlining to avoid the string comparison being optimized away. diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java index 3ac6f89c5f..2dad14ce31 100644 --- a/test/570-checker-select/src/Main.java +++ b/test/570-checker-select/src/Main.java @@ -414,6 +414,46 @@ public class Main { return a > 0x7FFFFFFFFFFFFFFFL ? x : y; } + /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar4(long, long, long) disassembly (after) + /// CHECK: Select + /// CHECK-NEXT: orrs ip, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: cmp + /// CHECK-NOT: sbcs + + public static long $noinline$LongNonmatCondCst_LongVarVar4(long a, long x, long y) { + return a == 0 ? x : y; + } + + /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar5(long, long, long) disassembly (after) + /// CHECK: Select + /// CHECK-NEXT: orrs ip, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: cmp + /// CHECK-NOT: sbcs + + public static long $noinline$LongNonmatCondCst_LongVarVar5(long a, long x, long y) { + return a != 0 ? x : y; + } + + /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar6(long, long, long) disassembly (after) + /// CHECK: Select + /// CHECK-NEXT: cmp {{r\d+}}, #0 + /// CHECK-NOT: cmp + /// CHECK-NOT: sbcs + + public static long $noinline$LongNonmatCondCst_LongVarVar6(long a, long x, long y) { + return a >= 0 ? x : y; + } + + /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar7(long, long, long) disassembly (after) + /// CHECK: Select + /// CHECK-NEXT: cmp {{r\d+}}, #0 + /// CHECK-NOT: cmp + /// CHECK-NOT: sbcs + + public static long $noinline$LongNonmatCondCst_LongVarVar7(long a, long x, long y) { + return a < 0 ? x : y; + } + /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after) /// CHECK: <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}] /// CHECK: <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>] @@ -688,6 +728,37 @@ public class Main { assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L)); + long[] long_inputs = { + 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 2L, 0x100000000L, 0xFFFFFFFF00000000L, -9000L}; + + long[] expected_1 = {5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(expected_1[i], $noinline$LongNonmatCondCst_LongVarVar4(long_inputs[i], 5L, 7L)); + } + + long[] expected_2 = {7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(expected_2[i], $noinline$LongNonmatCondCst_LongVarVar5(long_inputs[i], 5L, 7L)); + } + + long[] expected_3 = {5L, 5L, 7L, 7L, 5L, 5L, 5L, 7L, 7L}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(expected_3[i], $noinline$LongNonmatCondCst_LongVarVar6(long_inputs[i], 5L, 7L)); + } + + long[] expected_4 = {7L, 7L, 5L, 5L, 7L, 7L, 7L, 5L, 5L}; + + for (int i = 0; i < long_inputs.length; i++) { + assertEqual(expected_4[i], $noinline$LongNonmatCondCst_LongVarVar7(long_inputs[i], 5L, 7L)); + } + + assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(0L, 5L, 7L)); + assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(2L, 5L, 7L)); + assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar7(-9000L, 5L, 7L)); + assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7)); assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7)); assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7)); diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index a89fe5b0d4..ca52a996ac 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -668,12 +668,6 @@ profman_cmdline=$(echo $profman_cmdline) # Note: this is required as envsetup right now exports detect_leaks=0. RUN_TEST_ASAN_OPTIONS="" -# JVMTI has a mismatch of malloc with delete. b/38322765 -if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then - RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:" -fi -RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}alloc_dealloc_mismatch=0" - # Multiple shutdown leaks. b/38341789 if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:" diff --git a/tools/ahat/src/heapdump/AhatClassInstance.java b/tools/ahat/src/heapdump/AhatClassInstance.java index 273530af64..c10d604630 100644 --- a/tools/ahat/src/heapdump/AhatClassInstance.java +++ b/tools/ahat/src/heapdump/AhatClassInstance.java @@ -154,10 +154,7 @@ public class AhatClassInstance extends AhatInstance { } @Override public AhatInstance getAssociatedBitmapInstance() { - if (isInstanceOfClass("android.graphics.Bitmap")) { - return this; - } - return null; + return getBitmapInfo() == null ? null : this; } @Override public boolean isClassInstance() { @@ -178,14 +175,27 @@ public class AhatClassInstance extends AhatInstance { * Returns null if the field value is null, not a byte[] or could not be read. */ private byte[] getByteArrayField(String fieldName) { - Value value = getField(fieldName); - if (!value.isAhatInstance()) { - return null; + AhatInstance field = getRefField(fieldName); + return field == null ? null : field.asByteArray(); + } + + private static class BitmapInfo { + public final int width; + public final int height; + public final byte[] buffer; + + public BitmapInfo(int width, int height, byte[] buffer) { + this.width = width; + this.height = height; + this.buffer = buffer; } - return value.asAhatInstance().asByteArray(); } - public BufferedImage asBitmap() { + /** + * Return bitmap info for this object, or null if no appropriate bitmap + * info is available. + */ + private BitmapInfo getBitmapInfo() { if (!isInstanceOfClass("android.graphics.Bitmap")) { return null; } @@ -205,20 +215,34 @@ public class AhatClassInstance extends AhatInstance { return null; } + if (buffer.length < 4 * height * width) { + return null; + } + + return new BitmapInfo(width, height, buffer); + + } + + public BufferedImage asBitmap() { + BitmapInfo info = getBitmapInfo(); + if (info == null) { + return null; + } + // Convert the raw data to an image // Convert BGRA to ABGR - int[] abgr = new int[height * width]; + int[] abgr = new int[info.height * info.width]; for (int i = 0; i < abgr.length; i++) { abgr[i] = ( - (((int) buffer[i * 4 + 3] & 0xFF) << 24) - + (((int) buffer[i * 4 + 0] & 0xFF) << 16) - + (((int) buffer[i * 4 + 1] & 0xFF) << 8) - + ((int) buffer[i * 4 + 2] & 0xFF)); + (((int) info.buffer[i * 4 + 3] & 0xFF) << 24) + + (((int) info.buffer[i * 4 + 0] & 0xFF) << 16) + + (((int) info.buffer[i * 4 + 1] & 0xFF) << 8) + + ((int) info.buffer[i * 4 + 2] & 0xFF)); } BufferedImage bitmap = new BufferedImage( - width, height, BufferedImage.TYPE_4BYTE_ABGR); - bitmap.setRGB(0, 0, width, height, abgr, 0, width); + info.width, info.height, BufferedImage.TYPE_4BYTE_ABGR); + bitmap.setRGB(0, 0, info.width, info.height, abgr, 0, info.width); return bitmap; } } diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh index 6c2c07213f..546a6bf55b 100755 --- a/tools/setup-buildbot-device.sh +++ b/tools/setup-buildbot-device.sh @@ -38,6 +38,11 @@ fi seconds_per_hour=3600 +# Kill logd first, so that when we set the adb buffer size later in this file, +# it is brought up again. +echo -e "${green}Killing logd, seen leaking on fugu/N${nc}" +adb shell killall -9 /system/bin/logd + # Update date on device if the difference with host is more than one hour. if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then echo -e "${green}Update date on device${nc}" @@ -61,9 +66,6 @@ adb shell uptime echo -e "${green}Battery info${nc}" adb shell dumpsys battery -echo -e "${green}Killing logd, seen leaking on fugu/N${nc}" -adb shell killall -9 /system/bin/logd - echo -e "${green}Setting adb buffer size to 32MB${nc}" adb logcat -G 32M adb logcat -g |