diff options
Diffstat (limited to 'compiler/optimizing')
32 files changed, 2187 insertions, 897 deletions
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index cb6e14b2bd..a949c33149 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public testing::Test { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 713d370c87..c66bd77d6b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1656,6 +1656,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) { } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition, bool invert, CodeGeneratorARM* codegen) { @@ -1669,7 +1697,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond std::swap(cond, opposite); } - std::pair<Condition, Condition> ret; + std::pair<Condition, Condition> ret(EQ, NE); const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1677,7 +1705,38 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond const Register left_high = left.AsRegisterPairHigh<Register>(); const Register left_low = left.AsRegisterPairLow<Register>(); - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(), + &cond, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ orrs(IP, left_low, ShifterOperand(left_high)); + return ret; + case kCondLT: + case kCondGE: + __ cmp(left_high, ShifterOperand(0)); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(NE, EQ); + FALLTHROUGH_INTENDED; + case kCondAE: + __ cmp(left_low, ShifterOperand(left_low)); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1837,10 +1896,14 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition, static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition( + Int64FromConstant(locations->InAt(1).GetConstant()), + &c, + &opposite); ShifterOperand so; if (c < kCondLT || c > kCondGE) { @@ -1848,9 +1911,11 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || - !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || + !IsUint<8>(Low32Bits(value)))) { return false; } } else if (c == kCondLE || c == kCondGT) { @@ -1877,6 +1942,329 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + const auto condition = GenerateTest(cond, false, codegen); + + __ mov(out, ShifterOperand(0), AL, kCcKeep); + + if (ArmAssembler::IsLowRegister(out)) { + __ it(condition.first); + __ mov(out, ShifterOperand(1), condition.first); + } else { + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ b(final_label, condition.second); + __ LoadImmediate(out, 1); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + int32_t value_high = -High32Bits(value); + int32_t value_low = -Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out == left_high) { + std::swap(left_low, left_high); + std::swap(value_low, value_high); + } + + __ AddConstant(out, left_low, value_low); + __ AddConstant(IP, left_high, value_high); + } else { + DCHECK(right.IsRegisterPair()); + __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>())); + __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>())); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ orrs(out, out, ShifterOperand(IP)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ orr(out, out, ShifterOperand(IP)); + codegen->GenerateConditionWithZero(condition, out, out, IP); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + Label* true_label, + Label* false_label, + CodeGeneratorARM* codegen) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ CmpConstant(left_high, val_high); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ CmpConstant(left_low, val_low); + } else { + Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); + + __ cmp(left_high, ShifterOperand(right_high)); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ cmp(left_low, ShifterOperand(right_low)); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ b(true_label, final_condition); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const Register out = locations->Out().AsRegister<Register>(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out)) { + // We only care if both input registers are 0 or not. + __ orrs(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ orr(out, + left.AsRegisterPairLow<Register>(), + ShifterOperand(left.AsRegisterPairHigh<Register>())); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>()); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + Label done_label; + Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); + + // False case: result = 0. + __ Bind(&false_label); + __ mov(out, ShifterOperand(0)); + __ b(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ mov(out, ShifterOperand(1)); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + Register in = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), + &condition, + &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (ArmAssembler::IsLowRegister(out) && out == in) { + __ cmp(out, ShifterOperand(0)); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + ShifterOperand operand; + + if (right.IsConstant()) { + operand = ShifterOperand(value); + } else if (out == right.AsRegister<Register>()) { + // Avoid 32-bit instructions if possible. + operand = ShifterOperand(in); + in = right.AsRegister<Register>(); + } else { + operand = ShifterOperand(right.AsRegister<Register>()); + } + + if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { + __ subs(out, in, operand); + __ it(NE); + __ mov(out, ShifterOperand(1), NE); + } else { + __ sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2009,9 +2397,10 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2479,89 +2868,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, - Label* true_label, - Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - Register left_high = left.AsRegisterPairHigh<Register>(); - Register left_low = left.AsRegisterPairLow<Register>(); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ CmpConstant(left_high, val_high); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ CmpConstant(left_low, val_low); - } else { - Register right_high = right.AsRegisterPairHigh<Register>(); - Register right_low = right.AsRegisterPairLow<Register>(); - - __ cmp(left_high, ShifterOperand(right_high)); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ cmp(left_low, ShifterOperand(right_low)); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ b(true_label, final_condition); -} - void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, Label* true_target_in, Label* false_target_in) { @@ -2596,7 +2902,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); if (false_target != &fallthrough_target) { __ b(false_target); @@ -2911,6 +3217,80 @@ void CodeGeneratorARM::GenerateNop() { __ nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) { + // temp = - in; only 0 sets the carry flag. + __ rsbs(temp, in, ShifterOperand(0)); + + if (out == in) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ adc(out, temp, ShifterOperand(in)); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + if (out == in) { + if (temp == kNoRegister || in == temp) { + temp = IP; + } + } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ subs(temp, in, ShifterOperand(1)); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ sbc(out, in, ShifterOperand(temp)); + break; + case kCondGE: + __ mvn(out, ShifterOperand(in)); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ mov(out, ShifterOperand(1)); + break; + case kCondB: + // Trivially false. + __ mov(out, ShifterOperand(0)); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -2947,48 +3327,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { return; } - const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + const Primitive::Type type = cond->GetLeft()->GetType(); - if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); - - __ it(condition.first); - __ mov(out, ShifterOperand(1), condition.first); - __ it(condition.second); - __ mov(out, ShifterOperand(0), condition.second); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - Label done_label; - Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + const LocationSummary* const locations = cond->GetLocations(); + Register left = locations->InAt(0).AsRegister<Register>(); + const Register out = locations->Out().AsRegister<Register>(); + const Location right_loc = locations->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ LoadImmediate(out, 0); - __ b(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ LoadImmediate(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + Register right = right_loc.AsRegister<Register>(); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out == right) { + std::swap(left, right); + } - __ mov(out, ShifterOperand(0), AL, kCcKeep); - __ b(final_label, condition.second); - __ LoadImmediate(out, 1); - } + __ eor(out, left, ShifterOperand(right)); - if (done_label.IsLinked()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ eor(out, out, ShifterOperand(1)); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -8588,6 +8968,18 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register temp_reg = temp.AsRegister<Register>(); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp_reg, temp_reg, ShifterOperand(PC)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; @@ -8680,9 +9072,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ blx(LR); } -CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( @@ -8695,6 +9089,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8759,22 +9158,26 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 47e6be59bd..fa1c14dcda 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,7 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -482,10 +481,11 @@ class CodeGeneratorARM : public CodeGenerator { Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); @@ -623,6 +623,14 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + Register out, + Register in, + Register temp = kNoRegister); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -662,12 +670,14 @@ class CodeGeneratorARM : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7ff100d870..096eb07074 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1449,9 +1449,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4510,6 +4511,17 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative method patch. + vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add ADD with its PC-relative method patch. + vixl::aarch64::Label* add_label = + NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); @@ -4633,12 +4645,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->GenerateInvokePolymorphicCall(invoke); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( - const DexFile& dex_file, - dex::StringIndex string_index, +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( + MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + adrp_label, + &pc_relative_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( @@ -4655,6 +4668,14 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return + NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset, @@ -4747,9 +4768,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { @@ -4758,15 +4780,18 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc info.pc_insn_label->GetLocation(), info.offset_or_index)); } - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 56444dc0dc..71e221da22 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -549,12 +549,11 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative string patch for an instruction and return the label + // Add a new PC-relative method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, + vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); // Add a new PC-relative type patch for an instruction and return the label @@ -573,6 +572,14 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative dex cache array patch for an instruction and return // the label to be bound before the instruction. The instruction will be // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` @@ -787,12 +794,14 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 015e6ddea3..34821f83cd 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1775,6 +1775,34 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege } } +static int64_t AdjustConstantForCondition(int64_t value, + IfCondition* condition, + IfCondition* opposite) { + if (value == 1) { + if (*condition == kCondB) { + value = 0; + *condition = kCondEQ; + *opposite = kCondNE; + } else if (*condition == kCondAE) { + value = 0; + *condition = kCondNE; + *opposite = kCondEQ; + } + } else if (value == -1) { + if (*condition == kCondGT) { + value = 0; + *condition = kCondGE; + *opposite = kCondLT; + } else if (*condition == kCondLE) { + value = 0; + *condition = kCondLT; + *opposite = kCondGE; + } + } + + return value; +} + static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( HCondition* condition, bool invert, @@ -1797,7 +1825,37 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( const vixl32::Register left_high = HighRegisterFrom(left); const vixl32::Register left_low = LowRegisterFrom(left); - int64_t value = Int64ConstantFrom(right); + int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite); + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (cond) { + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + __ Orrs(temps.Acquire(), left_low, left_high); + return ret; + case kCondLT: + case kCondGE: + __ Cmp(left_high, 0); + return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); + // Trivially true or false. + case kCondB: + ret = std::make_pair(ne, eq); + FALLTHROUGH_INTENDED; + case kCondAE: + __ Cmp(left_low, left_low); + return ret; + default: + break; + } + } switch (cond) { case kCondEQ: @@ -1842,8 +1900,6 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( FALLTHROUGH_INTENDED; case kCondGE: case kCondLT: { - UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); - __ Cmp(left_low, Low32Bits(value)); __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); @@ -1961,18 +2017,22 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); - const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - const int64_t value = Int64ConstantFrom(locations->InAt(1)); + IfCondition c = condition->GetCondition(); + IfCondition opposite = condition->GetOppositeCondition(); + const int64_t value = + AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); if (c < kCondLT || c > kCondGE) { // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used. - if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { + // encoding can be used; 0 is always handled, no matter what registers are + // used by the first input. + if (value != 0 && + (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { return false; } // TODO(VIXL): The rest of the checks are there to keep the backend in sync with @@ -1991,6 +2051,353 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) return true; } +static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); + + const vixl32::Register out = OutputRegister(cond); + const auto condition = GenerateTest(cond, false, codegen); + + __ Mov(LeaveFlags, out, 0); + + if (out.IsLow()) { + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(condition.first); + __ mov(condition.first, out, 1); + } else { + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + + __ B(condition.second, final_label, /* far_target */ false); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } + } +} + +static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + vixl32::Register temp; + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), + &condition, + &opposite); + Operand right_high = High32Bits(value); + Operand right_low = Low32Bits(value); + + // The output uses Location::kNoOutputOverlap. + if (out.Is(left_high)) { + std::swap(left_low, left_high); + std::swap(right_low, right_high); + } + + __ Sub(out, left_low, right_low); + temp = temps.Acquire(); + __ Sub(temp, left_high, right_high); + } else { + DCHECK(right.IsRegisterPair()); + temp = temps.Acquire(); + __ Sub(temp, left_high, HighRegisterFrom(right)); + __ Sub(out, left_low, LowRegisterFrom(right)); + } + + // Need to check after calling AdjustConstantForCondition(). + DCHECK(condition == kCondEQ || condition == kCondNE) << condition; + + if (condition == kCondNE && out.IsLow()) { + __ Orrs(out, out, temp); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Orr(out, out, temp); + codegen->GenerateConditionWithZero(condition, out, out, temp); + } +} + +static void GenerateLongComparesAndJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label, + CodeGeneratorARMVIXL* codegen) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = Int64ConstantFrom(right); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ Cmp(left_high, val_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, val_low); + } else { + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); + + __ Cmp(left_high, right_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, right_low); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ B(final_condition, true_label); +} + +static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + const LocationSummary* const locations = cond->GetLocations(); + IfCondition condition = cond->GetCondition(); + const vixl32::Register out = OutputRegister(cond); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + // Comparisons against 0 are common enough to deserve special attention. + if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow()) { + // We only care if both input registers are 0 or not. + __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left)); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + // We only care if both input registers are 0 or not. + __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left)); + codegen->GenerateConditionWithZero(condition, out, out); + return; + case kCondLT: + case kCondGE: + // We only care about the sign bit. + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left)); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { + GenerateEqualLong(cond, codegen); + return; + } + + if (CanGenerateTest(cond, codegen->GetAssembler())) { + GenerateConditionGeneric(cond, codegen); + return; + } + + // Convert the jumps into the result. + vixl32::Label done_label; + vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); + vixl32::Label true_label, false_label; + + GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); + + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(final_label); + + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } +} + +static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + GenerateConditionLong(cond, codegen); + return; + } + + IfCondition condition = cond->GetCondition(); + vixl32::Register in = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right = cond->GetLocations()->InAt(1); + int64_t value; + + if (right.IsConstant()) { + IfCondition opposite = cond->GetOppositeCondition(); + + value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite); + + // Comparisons against 0 are common enough to deserve special attention. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + if (out.IsLow() && out.Is(in)) { + __ Cmp(out, 0); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + return; + } + + FALLTHROUGH_INTENDED; + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + codegen->GenerateConditionWithZero(condition, out, in); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + Operand operand(0); + + if (right.IsConstant()) { + operand = Operand::From(value); + } else if (out.Is(RegisterFrom(right))) { + // Avoid 32-bit instructions if possible. + operand = InputOperandAt(cond, 0); + in = RegisterFrom(right); + } else { + operand = InputOperandAt(cond, 1); + } + + if (condition == kCondNE && out.IsLow()) { + __ Subs(out, in, operand); + + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + + __ it(ne); + __ mov(ne, out, 1); + } else { + __ Sub(out, in, operand); + codegen->GenerateConditionWithZero(condition, out, out); + } + + return; + } + + GenerateConditionGeneric(cond, codegen); +} + static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2093,9 +2500,10 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2547,89 +2955,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label); -} - void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target_in, vixl32::Label* false_target_in) { @@ -2664,7 +2989,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target); + GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); if (false_target != &fallthrough) { __ B(false_target); @@ -2975,6 +3300,83 @@ void CodeGeneratorARMVIXL::GenerateNop() { __ Nop(); } +// `temp` is an extra temporary register that is used for some conditions; +// callers may not specify it, in which case the method will use a scratch +// register instead. +void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, + vixl32::Register out, + vixl32::Register in, + vixl32::Register temp) { + switch (condition) { + case kCondEQ: + // x <= 0 iff x == 0 when the comparison is unsigned. + case kCondBE: + if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) { + temp = out; + } + + // Avoid 32-bit instructions if possible; note that `in` and `temp` must be + // different as well. + if (in.IsLow() && temp.IsLow() && !in.Is(temp)) { + // temp = - in; only 0 sets the carry flag. + __ Rsbs(temp, in, 0); + + if (out.Is(in)) { + std::swap(in, temp); + } + + // out = - in + in + carry = carry + __ Adc(out, temp, in); + } else { + // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, in); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); + } + + break; + case kCondNE: + // x > 0 iff x != 0 when the comparison is unsigned. + case kCondA: { + UseScratchRegisterScope temps(GetVIXLAssembler()); + + if (out.Is(in)) { + if (!temp.IsValid() || in.Is(temp)) { + temp = temps.Acquire(); + } + } else if (!temp.IsValid() || !temp.IsLow()) { + temp = out; + } + + // temp = in - 1; only 0 does not set the carry flag. + __ Subs(temp, in, 1); + // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry + __ Sbc(out, in, temp); + break; + } + case kCondGE: + __ Mvn(out, in); + in = out; + FALLTHROUGH_INTENDED; + case kCondLT: + // We only care about the sign bit. + __ Lsr(out, in, 31); + break; + case kCondAE: + // Trivially true. + __ Mov(out, 1); + break; + case kCondB: + // Trivially false. + __ Mov(out, 0); + break; + default: + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -3011,52 +3413,47 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - const vixl32::Register out = OutputRegister(cond); - - if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) { - const auto condition = GenerateTest(cond, false, codegen_); - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(GetVIXLAssembler(), - 4 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); + const Primitive::Type type = cond->GetLeft()->GetType(); - __ it(condition.first); - __ mov(condition.first, out, 1); - __ it(condition.second); - __ mov(condition.second, out, 0); + if (Primitive::IsFloatingPointType(type)) { + GenerateConditionGeneric(cond, codegen_); return; } - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { - vixl32::Label true_label, false_label; + const IfCondition condition = cond->GetCondition(); - GenerateLongComparesAndJumps(cond, &true_label, &false_label); + // A condition with only one boolean input, or two boolean inputs without being equality or + // inequality results from transformations done by the instruction simplifier, and is handled + // as a regular condition with integral inputs. + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + vixl32::Register left = InputRegisterAt(cond, 0); + const vixl32::Register out = OutputRegister(cond); + const Location right_loc = cond->GetLocations()->InAt(1); - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); + // The constant case is handled by the instruction simplifier. + DCHECK(!right_loc.IsConstant()); - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - } else { - DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); + vixl32::Register right = RegisterFrom(right_loc); - const auto condition = GenerateTest(cond, false, codegen_); + // Avoid 32-bit instructions if possible. + if (out.Is(right)) { + std::swap(left, right); + } - __ Mov(LeaveFlags, out, 0); - __ B(condition.second, final_label, /* far_target */ false); - __ Mov(out, 1); - } + __ Eor(out, left, right); - if (done_label.IsReferenced()) { - __ Bind(&done_label); + if (condition == kCondEQ) { + __ Eor(out, out, 1); + } + + return; } + + GenerateConditionIntegralOrNonPrimitive(cond, codegen_); } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -8734,6 +9131,13 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; @@ -8850,9 +9254,11 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location __ blx(lr); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( @@ -8865,6 +9271,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntry return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -8934,22 +9345,26 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa DCHECK(linker_patches->empty()); size_t size = /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index daba9bf060..91f7524c8e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -401,9 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, vixl::aarch32::Label* true_target, vixl::aarch32::Label* false_target); - void GenerateLongComparesAndJumps(HCondition* cond, - vixl::aarch32::Label* true_label, - vixl::aarch32::Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -566,10 +563,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); @@ -716,6 +714,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, vixl::aarch32::Register out); + // `temp` is an extra temporary register that is used for some conditions; + // callers may not specify it, in which case the method will use a scratch + // register instead. + void GenerateConditionWithZero(IfCondition condition, + vixl::aarch32::Register out, + vixl::aarch32::Register in, + vixl::aarch32::Register temp = vixl32::Register()); + private: vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); @@ -760,12 +766,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 95be3d7fd2..d8ac99a9a6 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1061,9 +1061,10 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1602,30 +1603,36 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( @@ -1638,6 +1645,11 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -6947,7 +6959,7 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6; + bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6; IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -7084,6 +7096,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: break; default: @@ -7103,7 +7116,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); bool is_r6 = GetInstructionSetFeatures().IsR6(); - Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6) + Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6) ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; @@ -7121,6 +7134,16 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + bool reordering = __ SetReorder(false); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info, temp_reg, base_reg); + __ Addiu(temp_reg, temp_reg, /* placeholder */ 0x5678); + __ SetReorder(reordering); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 449cb4c62b..ff1fde6489 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -582,10 +582,11 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsLabel pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); @@ -644,12 +645,15 @@ class CodeGeneratorMIPS : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5cdff5a7bc..096139191e 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -958,9 +958,10 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1440,30 +1441,36 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - pc_relative_string_patches_.size() + + pc_relative_method_patches_.size() + pc_relative_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + pc_relative_string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); - if (!GetCompilerOptions().IsBootImage()) { - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); - } else { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); + } else { + DCHECK(pc_relative_method_patches_.empty()); + DCHECK(pc_relative_type_patches_.empty()); + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); } EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( - const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &pc_relative_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( @@ -1476,6 +1483,11 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPa return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); @@ -4923,6 +4935,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadLiteral(temp.AsRegister<GpuRegister>(), kLoadDoubleword, diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 1f34ced687..f49ad49fce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -545,10 +545,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Label pc_rel_label; }; - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, @@ -605,12 +606,15 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Uint64ToLiteralMap uint64_literals_; // PC-relative patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; // Patches for class root accesses in JIT compiled code. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4a279d8de1..f3ec112548 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1032,9 +1032,10 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -2167,7 +2168,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2176,7 +2177,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok HandleInvoke(invoke); // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { + if (invoke->HasPcRelativeMethodLoadKind()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } } @@ -4543,6 +4544,14 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage()); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootMethodPatch(invoke); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; @@ -4631,13 +4640,14 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + boot_image_method_patches_.emplace_back(address, + *invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { @@ -4656,6 +4666,15 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + string_patches_.emplace_back(address, + load_string->GetDexFile(), + load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* address = @@ -4693,17 +4712,21 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f08d642f5e..21c527e8b0 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -413,9 +413,10 @@ class CodeGeneratorX86 : public CodeGenerator { // Generate a call to a virtual method. void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, const DexFile& dex_file, @@ -633,16 +634,17 @@ class CodeGeneratorX86 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<X86PcRelativePatchInfo> string_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ac0f37b717..bf1c42ae8e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -991,6 +991,12 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().IsBootImage()); + __ leal(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + RecordBootMethodPatch(invoke); + break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; @@ -1079,10 +1085,10 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t kX86_64PointerSize).SizeValue())); } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + __ Bind(&boot_image_method_patches_.back().label); } void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { @@ -1096,6 +1102,12 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { return &type_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&string_patches_.back().label); +} + Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); @@ -1128,17 +1140,21 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(linker_patches->empty()); size_t size = pc_relative_dex_cache_patches_.size() + - string_patches_.size() + + boot_image_method_patches_.size() + boot_image_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + string_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { + DCHECK(boot_image_method_patches_.empty()); DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } @@ -1231,12 +1247,13 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, isa_features_(isa_features), constant_area_start_(0), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d8005cc410..3039e0519c 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -408,9 +408,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); + void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, @@ -603,22 +604,23 @@ class CodeGeneratorX86_64 : public CodeGenerator { // PC-relative DexCache access info. ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<PatchInfo<Label>> string_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - - // Fixups for jump tables need to be handled specially. - ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // String patch locations; type depends on configuration (app .bss or boot image). + ArenaDeque<PatchInfo<Label>> string_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; - // Patches for class literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_class_patches_; + // Fixups for jump tables need to be handled specially. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 8674e727bb..0ec6ee2fe2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1856,7 +1856,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); - InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); + InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2cedde900e..d14716601c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -30,9 +30,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, + CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), + compiler_driver_(compiler_driver), stats_(stats) {} void Run(); @@ -119,6 +121,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -130,7 +133,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { }; void InstructionSimplifier::Run() { - InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); + InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); visitor.Run(); } @@ -1896,7 +1899,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_); + HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); } } } diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index f7329a4a1f..5e2045580b 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,6 +24,7 @@ namespace art { class CodeGenerator; +class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -37,12 +38,14 @@ class CodeGenerator; */ class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph, - CodeGenerator* codegen, - OptimizingCompilerStats* stats = nullptr, - const char* name = kInstructionSimplifierPassName) + InstructionSimplifier(HGraph* graph, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen) {} + codegen_(codegen), + compiler_driver_(compiler_driver) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; @@ -50,6 +53,7 @@ class InstructionSimplifier : public HOptimization { private: CodeGenerator* codegen_; + CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 1df884e551..e8a62aafae 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -2598,11 +2598,7 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ eor(out, out, ShifterOperand(infinity)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2625,11 +2621,7 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ eor(out, out, ShifterOperand(infinity_high2)); // We don't care about the sign bit, so shift left. __ orr(out, IP, ShifterOperand(out, LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 2d9781ade8..ce3ba52b34 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2971,11 +2971,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ Eor(out, out, infinity); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -3001,11 +2997,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { __ Eor(out, out, infinity_high2); // We don't care about the sign bit, so shift left. __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); - // If the result is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, out); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); + codegen_->GenerateConditionWithZero(kCondEQ, out, out); } void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc new file mode 100644 index 0000000000..f2ee345c8c --- /dev/null +++ b/compiler/optimizing/load_store_analysis.cc @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" + +namespace art { + +// A cap for the number of heap locations to prevent pathological time/space consumption. +// The number of heap locations for most of the methods stays below this threshold. +constexpr size_t kMaxNumberOfHeapLocations = 32; + +void LoadStoreAnalysis::Run() { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector_.VisitBasicBlock(block); + } + + if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { + // Bail out if there are too many heap locations to deal with. + heap_location_collector_.CleanUp(); + return; + } + if (!heap_location_collector_.HasHeapStores()) { + // Without heap stores, this pass would act mostly as GVN on heap accesses. + heap_location_collector_.CleanUp(); + return; + } + if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { + // Don't do load/store elimination if the method has volatile field accesses or + // monitor operations, for now. + // TODO: do it right. + heap_location_collector_.CleanUp(); + return; + } + + heap_location_collector_.BuildAliasingMatrix(); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h new file mode 100644 index 0000000000..4e940f30bf --- /dev/null +++ b/compiler/optimizing/load_store_analysis.h @@ -0,0 +1,518 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ + +#include "escape.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +// A ReferenceInfo contains additional info about a reference such as +// whether it's a singleton, returned, etc. +class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { + public: + ReferenceInfo(HInstruction* reference, size_t pos) + : reference_(reference), + position_(pos), + is_singleton_(true), + is_singleton_and_not_returned_(true), + is_singleton_and_not_deopt_visible_(true), + has_index_aliasing_(false) { + CalculateEscape(reference_, + nullptr, + &is_singleton_, + &is_singleton_and_not_returned_, + &is_singleton_and_not_deopt_visible_); + } + + HInstruction* GetReference() const { + return reference_; + } + + size_t GetPosition() const { + return position_; + } + + // Returns true if reference_ is the only name that can refer to its value during + // the lifetime of the method. So it's guaranteed to not have any alias in + // the method (including its callees). + bool IsSingleton() const { + return is_singleton_; + } + + // Returns true if reference_ is a singleton and not returned to the caller or + // used as an environment local of an HDeoptimize instruction. + // The allocation and stores into reference_ may be eliminated for such cases. + bool IsSingletonAndRemovable() const { + return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; + } + + // Returns true if reference_ is a singleton and returned to the caller or + // used as an environment local of an HDeoptimize instruction. + bool IsSingletonAndNonRemovable() const { + return is_singleton_ && + (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); + } + + bool HasIndexAliasing() { + return has_index_aliasing_; + } + + void SetHasIndexAliasing(bool has_index_aliasing) { + // Only allow setting to true. + DCHECK(has_index_aliasing); + has_index_aliasing_ = has_index_aliasing; + } + + private: + HInstruction* const reference_; + const size_t position_; // position in HeapLocationCollector's ref_info_array_. + + // Can only be referred to by a single name in the method. + bool is_singleton_; + // Is singleton and not returned to caller. + bool is_singleton_and_not_returned_; + // Is singleton and not used as an environment local of HDeoptimize. + bool is_singleton_and_not_deopt_visible_; + // Some heap locations with reference_ have array index aliasing, + // e.g. arr[i] and arr[j] may be the same location. + bool has_index_aliasing_; + + DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); +}; + +// A heap location is a reference-offset/index pair that a value can be loaded from +// or stored to. +class HeapLocation : public ArenaObject<kArenaAllocMisc> { + public: + static constexpr size_t kInvalidFieldOffset = -1; + + // TODO: more fine-grained array types. + static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; + + HeapLocation(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) + : ref_info_(ref_info), + offset_(offset), + index_(index), + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { + DCHECK(ref_info != nullptr); + DCHECK((offset == kInvalidFieldOffset && index != nullptr) || + (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } + } + + ReferenceInfo* GetReferenceInfo() const { return ref_info_; } + size_t GetOffset() const { return offset_; } + HInstruction* GetIndex() const { return index_; } + + // Returns the definition of declaring class' dex index. + // It's kDeclaringClassDefIndexForArrays for an array element. + int16_t GetDeclaringClassDefIndex() const { + return declaring_class_def_index_; + } + + bool IsArrayElement() const { + return index_ != nullptr; + } + + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + + private: + ReferenceInfo* const ref_info_; // reference for instance/static field or array access. + const size_t offset_; // offset of static/instance field. + HInstruction* const index_; // index of an array element. + const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. This gives + // better info on whether a singleton's location + // value may be killed by loop side effects. + + DISALLOW_COPY_AND_ASSIGN(HeapLocation); +}; + +// A HeapLocationCollector collects all relevant heap locations and keeps +// an aliasing matrix for all locations. +class HeapLocationCollector : public HGraphVisitor { + public: + static constexpr size_t kHeapLocationNotFound = -1; + // Start with a single uint32_t word. That's enough bits for pair-wise + // aliasing matrix of 8 heap locations. + static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; + + explicit HeapLocationCollector(HGraph* graph) + : HGraphVisitor(graph), + ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), + heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), + aliasing_matrix_(graph->GetArena(), + kInitialAliasingMatrixBitVectorSize, + true, + kArenaAllocLSE), + has_heap_stores_(false), + has_volatile_(false), + has_monitor_operations_(false) {} + + void CleanUp() { + heap_locations_.clear(); + ref_info_array_.clear(); + } + + size_t GetNumberOfHeapLocations() const { + return heap_locations_.size(); + } + + HeapLocation* GetHeapLocation(size_t index) const { + return heap_locations_[index]; + } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType()) { + ref = ref->InputAt(0); + } + return ref; + } + + ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { + for (size_t i = 0; i < ref_info_array_.size(); i++) { + ReferenceInfo* ref_info = ref_info_array_[i]; + if (ref_info->GetReference() == ref) { + DCHECK_EQ(i, ref_info->GetPosition()); + return ref_info; + } + } + return nullptr; + } + + bool HasHeapStores() const { + return has_heap_stores_; + } + + bool HasVolatile() const { + return has_volatile_; + } + + bool HasMonitorOps() const { + return has_monitor_operations_; + } + + // Find and return the heap location index in heap_locations_. + size_t FindHeapLocationIndex(ReferenceInfo* ref_info, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) const { + for (size_t i = 0; i < heap_locations_.size(); i++) { + HeapLocation* loc = heap_locations_[i]; + if (loc->GetReferenceInfo() == ref_info && + loc->GetOffset() == offset && + loc->GetIndex() == index && + loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { + return i; + } + } + return kHeapLocationNotFound; + } + + // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. + bool MayAlias(size_t index1, size_t index2) const { + if (index1 < index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); + } else if (index1 > index2) { + return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); + } else { + DCHECK(false) << "index1 and index2 are expected to be different"; + return true; + } + } + + void BuildAliasingMatrix() { + const size_t number_of_locations = heap_locations_.size(); + if (number_of_locations == 0) { + return; + } + size_t pos = 0; + // Compute aliasing info between every pair of different heap locations. + // Save the result in a matrix represented as a BitVector. + for (size_t i = 0; i < number_of_locations - 1; i++) { + for (size_t j = i + 1; j < number_of_locations; j++) { + if (ComputeMayAlias(i, j)) { + aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); + } + pos++; + } + } + } + + private: + // An allocation cannot alias with a name which already exists at the point + // of the allocation, such as a parameter or a load happening before the allocation. + bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { + // Any reference that can alias with the allocation must appear after it in the block/in + // the block's successors. In reverse post order, those instructions will be visited after + // the allocation. + return ref_info2->GetPosition() >= ref_info1->GetPosition(); + } + return true; + } + + bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { + if (ref_info1 == ref_info2) { + return true; + } else if (ref_info1->IsSingleton()) { + return false; + } else if (ref_info2->IsSingleton()) { + return false; + } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || + !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { + return false; + } + return true; + } + + // `index1` and `index2` are indices in the array of collected heap locations. + // Returns the position in the bit vector that tracks whether the two heap + // locations may alias. + size_t AliasingMatrixPosition(size_t index1, size_t index2) const { + DCHECK(index2 > index1); + const size_t number_of_locations = heap_locations_.size(); + // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). + return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); + } + + // An additional position is passed in to make sure the calculated position is correct. + size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { + size_t calculated_position = AliasingMatrixPosition(index1, index2); + DCHECK_EQ(calculated_position, position); + return calculated_position; + } + + // Compute if two locations may alias to each other. + bool ComputeMayAlias(size_t index1, size_t index2) const { + HeapLocation* loc1 = heap_locations_[index1]; + HeapLocation* loc2 = heap_locations_[index2]; + if (loc1->GetOffset() != loc2->GetOffset()) { + // Either two different instance fields, or one is an instance + // field and the other is an array element. + return false; + } + if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { + // Different types. + return false; + } + if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { + return false; + } + if (loc1->IsArrayElement() && loc2->IsArrayElement()) { + HInstruction* array_index1 = loc1->GetIndex(); + HInstruction* array_index2 = loc2->GetIndex(); + DCHECK(array_index1 != nullptr); + DCHECK(array_index2 != nullptr); + if (array_index1->IsIntConstant() && + array_index2->IsIntConstant() && + array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { + // Different constant indices do not alias. + return false; + } + ReferenceInfo* ref_info = loc1->GetReferenceInfo(); + ref_info->SetHasIndexAliasing(true); + } + return true; + } + + ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { + ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); + if (ref_info == nullptr) { + size_t pos = ref_info_array_.size(); + ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); + ref_info_array_.push_back(ref_info); + } + return ref_info; + } + + void CreateReferenceInfoForReferenceType(HInstruction* instruction) { + if (instruction->GetType() != Primitive::kPrimNot) { + return; + } + DCHECK(FindReferenceInfoOf(instruction) == nullptr); + GetOrCreateReferenceInfo(instruction); + } + + HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, + size_t offset, + HInstruction* index, + int16_t declaring_class_def_index) { + HInstruction* original_ref = HuntForOriginalReference(ref); + ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); + size_t heap_location_idx = FindHeapLocationIndex( + ref_info, offset, index, declaring_class_def_index); + if (heap_location_idx == kHeapLocationNotFound) { + HeapLocation* heap_loc = new (GetGraph()->GetArena()) + HeapLocation(ref_info, offset, index, declaring_class_def_index); + heap_locations_.push_back(heap_loc); + return heap_loc; + } + return heap_locations_[heap_location_idx]; + } + + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + if (field_info.IsVolatile()) { + has_volatile_ = true; + } + const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); + const size_t offset = field_info.GetFieldOffset().SizeValue(); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + } + + void VisitArrayAccess(HInstruction* array, HInstruction* index) { + GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, + index, HeapLocation::kDeclaringClassDefIndexForArrays); + } + + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + if (location->GetReferenceInfo()->IsSingleton()) { + // A singleton's location value may be killed by loop side effects if it's + // defined before that loop, and it's stored into inside that loop. + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + HInstruction* ref = location->GetReferenceInfo()->GetReference(); + DCHECK(ref->IsNewInstance()); + if (loop_info->IsDefinedOutOfTheLoop(ref)) { + // ref's location value may be killed by this loop's side effects. + location->SetValueKilledByLoopSideEffects(true); + } else { + // ref is defined inside this loop so this loop's side effects cannot + // kill its location value at the loop header since ref/its location doesn't + // exist yet at the loop header. + } + } + } else { + // For non-singletons, value_killed_by_loop_side_effects_ is inited to + // true. + DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); + } + } + + void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + has_heap_stores_ = true; + } + + // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses + // since we cannot accurately track the fields. + + void VisitArrayGet(HArrayGet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + has_heap_stores_ = true; + } + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { + // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. + CreateReferenceInfoForReferenceType(new_instance); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitParameterValue(HParameterValue* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitSelect(HSelect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { + has_monitor_operations_ = true; + } + + ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. + ArenaVector<HeapLocation*> heap_locations_; // All heap locations. + ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. + bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better + // alias analysis and won't be as effective. + bool has_volatile_; // If there are volatile field accesses. + bool has_monitor_operations_; // If there are monitor operations. + + DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); +}; + +class LoadStoreAnalysis : public HOptimization { + public: + explicit LoadStoreAnalysis(HGraph* graph) + : HOptimization(graph, kLoadStoreAnalysisPassName), + heap_location_collector_(graph) {} + + const HeapLocationCollector& GetHeapLocationCollector() const { + return heap_location_collector_; + } + + void Run() OVERRIDE; + + static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis"; + + private: + HeapLocationCollector heap_location_collector_; + + DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc new file mode 100644 index 0000000000..24187777f6 --- /dev/null +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "load_store_analysis.h" +#include "nodes.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +class LoadStoreAnalysisTest : public CommonCompilerTest { + public: + LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; +}; + +TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // array ParameterValue + // index ParameterValue + // c1 IntConstant + // c2 IntConstant + // c3 IntConstant + // array_get1 ArrayGet [array, c1] + // array_get2 ArrayGet [array, c2] + // array_set1 ArraySet [array, c1, c3] + // array_set2 ArraySet [array, index, c3] + HInstruction* array = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* index = new (&allocator_) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0); + HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0); + HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0); + HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0); + entry->AddInstruction(array); + entry->AddInstruction(index); + entry->AddInstruction(array_get1); + entry->AddInstruction(array_get2); + entry->AddInstruction(array_set1); + entry->AddInstruction(array_set2); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph_, it must see following heap locations + // array[c1], array[c2], array[index]; and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); + size_t field_off = HeapLocation::kInvalidFieldOffset; + size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; + size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def); + // must find this reference info for array in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations; + // and array[1], array[2], array[3] should be different heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc1 != loc2); + ASSERT_TRUE(loc2 != loc3); + ASSERT_TRUE(loc1 != loc3); + + // Test alias relationships after building aliasing matrix. + // array[1] and array[2] clearly should not alias; + // array[index] should alias with the others, because index is an unknow value. + heap_location_collector.BuildAliasingMatrix(); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); +} + +TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // entry: + // object ParameterValue + // c1 IntConstant + // set_field10 InstanceFieldSet [object, c1, 10] + // get_field10 InstanceFieldGet [object, 10] + // get_field20 InstanceFieldGet [object, 20] + + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + Primitive::kPrimNot); + HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, + c1, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object, + nullptr, + Primitive::kPrimInt, + MemberOffset(20), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + entry->AddInstruction(object); + entry->AddInstruction(set_field10); + entry->AddInstruction(get_field10); + entry->AddInstruction(get_field20); + + // Test HeapLocationCollector initialization. + // Should be no heap locations, no operations on the heap. + HeapLocationCollector heap_location_collector(graph_); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); + ASSERT_FALSE(heap_location_collector.HasHeapStores()); + + // Test that after visiting the graph, it must see following heap locations + // object.field10, object.field20 and it should see heap stores. + heap_location_collector.VisitBasicBlock(entry); + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's ref info and index records. + ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object); + size_t loc1 = heap_location_collector.FindHeapLocationIndex( + ref, 10, nullptr, kUnknownClassDefIndex); + size_t loc2 = heap_location_collector.FindHeapLocationIndex( + ref, 20, nullptr, kUnknownClassDefIndex); + // must find references info for object and in HeapLocationCollector. + ASSERT_TRUE(ref != nullptr); + // must find these heap locations. + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound); + // different fields of same object. + ASSERT_TRUE(loc1 != loc2); + // accesses to different fields of the same object should not alias. + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); +} + +} // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 76c9d2324b..211528b4bd 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "escape.h" @@ -23,477 +24,6 @@ namespace art { -class ReferenceInfo; - -// A cap for the number of heap locations to prevent pathological time/space consumption. -// The number of heap locations for most of the methods stays below this threshold. -constexpr size_t kMaxNumberOfHeapLocations = 32; - -// A ReferenceInfo contains additional info about a reference such as -// whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { - public: - ReferenceInfo(HInstruction* reference, size_t pos) - : reference_(reference), - position_(pos), - is_singleton_(true), - is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true), - has_index_aliasing_(false) { - CalculateEscape(reference_, - nullptr, - &is_singleton_, - &is_singleton_and_not_returned_, - &is_singleton_and_not_deopt_visible_); - } - - HInstruction* GetReference() const { - return reference_; - } - - size_t GetPosition() const { - return position_; - } - - // Returns true if reference_ is the only name that can refer to its value during - // the lifetime of the method. So it's guaranteed to not have any alias in - // the method (including its callees). - bool IsSingleton() const { - return is_singleton_; - } - - // Returns true if reference_ is a singleton and not returned to the caller or - // used as an environment local of an HDeoptimize instruction. - // The allocation and stores into reference_ may be eliminated for such cases. - bool IsSingletonAndRemovable() const { - return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; - } - - // Returns true if reference_ is a singleton and returned to the caller or - // used as an environment local of an HDeoptimize instruction. - bool IsSingletonAndNonRemovable() const { - return is_singleton_ && - (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); - } - - bool HasIndexAliasing() { - return has_index_aliasing_; - } - - void SetHasIndexAliasing(bool has_index_aliasing) { - // Only allow setting to true. - DCHECK(has_index_aliasing); - has_index_aliasing_ = has_index_aliasing; - } - - private: - HInstruction* const reference_; - const size_t position_; // position in HeapLocationCollector's ref_info_array_. - - // Can only be referred to by a single name in the method. - bool is_singleton_; - // Is singleton and not returned to caller. - bool is_singleton_and_not_returned_; - // Is singleton and not used as an environment local of HDeoptimize. - bool is_singleton_and_not_deopt_visible_; - // Some heap locations with reference_ have array index aliasing, - // e.g. arr[i] and arr[j] may be the same location. - bool has_index_aliasing_; - - DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); -}; - -// A heap location is a reference-offset/index pair that a value can be loaded from -// or stored to. -class HeapLocation : public ArenaObject<kArenaAllocMisc> { - public: - static constexpr size_t kInvalidFieldOffset = -1; - - // TODO: more fine-grained array types. - static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; - - HeapLocation(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) - : ref_info_(ref_info), - offset_(offset), - index_(index), - declaring_class_def_index_(declaring_class_def_index), - value_killed_by_loop_side_effects_(true) { - DCHECK(ref_info != nullptr); - DCHECK((offset == kInvalidFieldOffset && index != nullptr) || - (offset != kInvalidFieldOffset && index == nullptr)); - if (ref_info->IsSingleton() && !IsArrayElement()) { - // Assume this location's value cannot be killed by loop side effects - // until proven otherwise. - value_killed_by_loop_side_effects_ = false; - } - } - - ReferenceInfo* GetReferenceInfo() const { return ref_info_; } - size_t GetOffset() const { return offset_; } - HInstruction* GetIndex() const { return index_; } - - // Returns the definition of declaring class' dex index. - // It's kDeclaringClassDefIndexForArrays for an array element. - int16_t GetDeclaringClassDefIndex() const { - return declaring_class_def_index_; - } - - bool IsArrayElement() const { - return index_ != nullptr; - } - - bool IsValueKilledByLoopSideEffects() const { - return value_killed_by_loop_side_effects_; - } - - void SetValueKilledByLoopSideEffects(bool val) { - value_killed_by_loop_side_effects_ = val; - } - - private: - ReferenceInfo* const ref_info_; // reference for instance/static field or array access. - const size_t offset_; // offset of static/instance field. - HInstruction* const index_; // index of an array element. - const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop - // side effects because this location is stored - // into inside a loop. This gives - // better info on whether a singleton's location - // value may be killed by loop side effects. - - DISALLOW_COPY_AND_ASSIGN(HeapLocation); -}; - -static HInstruction* HuntForOriginalReference(HInstruction* ref) { - DCHECK(ref != nullptr); - while (ref->IsNullCheck() || ref->IsBoundType()) { - ref = ref->InputAt(0); - } - return ref; -} - -// A HeapLocationCollector collects all relevant heap locations and keeps -// an aliasing matrix for all locations. -class HeapLocationCollector : public HGraphVisitor { - public: - static constexpr size_t kHeapLocationNotFound = -1; - // Start with a single uint32_t word. That's enough bits for pair-wise - // aliasing matrix of 8 heap locations. - static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - - explicit HeapLocationCollector(HGraph* graph) - : HGraphVisitor(graph), - ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), - heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), - aliasing_matrix_(graph->GetArena(), - kInitialAliasingMatrixBitVectorSize, - true, - kArenaAllocLSE), - has_heap_stores_(false), - has_volatile_(false), - has_monitor_operations_(false) {} - - size_t GetNumberOfHeapLocations() const { - return heap_locations_.size(); - } - - HeapLocation* GetHeapLocation(size_t index) const { - return heap_locations_[index]; - } - - ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const { - for (size_t i = 0; i < ref_info_array_.size(); i++) { - ReferenceInfo* ref_info = ref_info_array_[i]; - if (ref_info->GetReference() == ref) { - DCHECK_EQ(i, ref_info->GetPosition()); - return ref_info; - } - } - return nullptr; - } - - bool HasHeapStores() const { - return has_heap_stores_; - } - - bool HasVolatile() const { - return has_volatile_; - } - - bool HasMonitorOps() const { - return has_monitor_operations_; - } - - // Find and return the heap location index in heap_locations_. - size_t FindHeapLocationIndex(ReferenceInfo* ref_info, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) const { - for (size_t i = 0; i < heap_locations_.size(); i++) { - HeapLocation* loc = heap_locations_[i]; - if (loc->GetReferenceInfo() == ref_info && - loc->GetOffset() == offset && - loc->GetIndex() == index && - loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { - return i; - } - } - return kHeapLocationNotFound; - } - - // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. - bool MayAlias(size_t index1, size_t index2) const { - if (index1 < index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2)); - } else if (index1 > index2) { - return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1)); - } else { - DCHECK(false) << "index1 and index2 are expected to be different"; - return true; - } - } - - void BuildAliasingMatrix() { - const size_t number_of_locations = heap_locations_.size(); - if (number_of_locations == 0) { - return; - } - size_t pos = 0; - // Compute aliasing info between every pair of different heap locations. - // Save the result in a matrix represented as a BitVector. - for (size_t i = 0; i < number_of_locations - 1; i++) { - for (size_t j = i + 1; j < number_of_locations; j++) { - if (ComputeMayAlias(i, j)) { - aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos)); - } - pos++; - } - } - } - - private: - // An allocation cannot alias with a name which already exists at the point - // of the allocation, such as a parameter or a load happening before the allocation. - bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) { - // Any reference that can alias with the allocation must appear after it in the block/in - // the block's successors. In reverse post order, those instructions will be visited after - // the allocation. - return ref_info2->GetPosition() >= ref_info1->GetPosition(); - } - return true; - } - - bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const { - if (ref_info1 == ref_info2) { - return true; - } else if (ref_info1->IsSingleton()) { - return false; - } else if (ref_info2->IsSingleton()) { - return false; - } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) || - !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) { - return false; - } - return true; - } - - // `index1` and `index2` are indices in the array of collected heap locations. - // Returns the position in the bit vector that tracks whether the two heap - // locations may alias. - size_t AliasingMatrixPosition(size_t index1, size_t index2) const { - DCHECK(index2 > index1); - const size_t number_of_locations = heap_locations_.size(); - // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1). - return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1)); - } - - // An additional position is passed in to make sure the calculated position is correct. - size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) { - size_t calculated_position = AliasingMatrixPosition(index1, index2); - DCHECK_EQ(calculated_position, position); - return calculated_position; - } - - // Compute if two locations may alias to each other. - bool ComputeMayAlias(size_t index1, size_t index2) const { - HeapLocation* loc1 = heap_locations_[index1]; - HeapLocation* loc2 = heap_locations_[index2]; - if (loc1->GetOffset() != loc2->GetOffset()) { - // Either two different instance fields, or one is an instance - // field and the other is an array element. - return false; - } - if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { - // Different types. - return false; - } - if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { - return false; - } - if (loc1->IsArrayElement() && loc2->IsArrayElement()) { - HInstruction* array_index1 = loc1->GetIndex(); - HInstruction* array_index2 = loc2->GetIndex(); - DCHECK(array_index1 != nullptr); - DCHECK(array_index2 != nullptr); - if (array_index1->IsIntConstant() && - array_index2->IsIntConstant() && - array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) { - // Different constant indices do not alias. - return false; - } - ReferenceInfo* ref_info = loc1->GetReferenceInfo(); - ref_info->SetHasIndexAliasing(true); - } - return true; - } - - ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { - ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); - if (ref_info == nullptr) { - size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); - ref_info_array_.push_back(ref_info); - } - return ref_info; - } - - void CreateReferenceInfoForReferenceType(HInstruction* instruction) { - if (instruction->GetType() != Primitive::kPrimNot) { - return; - } - DCHECK(FindReferenceInfoOf(instruction) == nullptr); - GetOrCreateReferenceInfo(instruction); - } - - HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, - size_t offset, - HInstruction* index, - int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); - ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); - size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); - if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (GetGraph()->GetArena()) - HeapLocation(ref_info, offset, index, declaring_class_def_index); - heap_locations_.push_back(heap_loc); - return heap_loc; - } - return heap_locations_[heap_location_idx]; - } - - HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { - if (field_info.IsVolatile()) { - has_volatile_ = true; - } - const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); - const size_t offset = field_info.GetFieldOffset().SizeValue(); - return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - } - - void VisitArrayAccess(HInstruction* array, HInstruction* index) { - GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, - index, HeapLocation::kDeclaringClassDefIndexForArrays); - } - - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - if (location->GetReferenceInfo()->IsSingleton()) { - // A singleton's location value may be killed by loop side effects if it's - // defined before that loop, and it's stored into inside that loop. - HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - HInstruction* ref = location->GetReferenceInfo()->GetReference(); - DCHECK(ref->IsNewInstance()); - if (loop_info->IsDefinedOutOfTheLoop(ref)) { - // ref's location value may be killed by this loop's side effects. - location->SetValueKilledByLoopSideEffects(true); - } else { - // ref is defined inside this loop so this loop's side effects cannot - // kill its location value at the loop header since ref/its location doesn't - // exist yet at the loop header. - } - } - } else { - // For non-singletons, value_killed_by_loop_side_effects_ is inited to - // true. - DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true); - } - } - - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); - has_heap_stores_ = true; - } - - // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses - // since we cannot accurately track the fields. - - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitArraySet(HArraySet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); - has_heap_stores_ = true; - } - - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - CreateReferenceInfoForReferenceType(new_instance); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitParameterValue(HParameterValue* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitSelect(HSelect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { - has_monitor_operations_ = true; - } - - ArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. - ArenaVector<HeapLocation*> heap_locations_; // All heap locations. - ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. - bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better - // alias analysis and won't be as effective. - bool has_volatile_; // If there are volatile field accesses. - bool has_monitor_operations_; // If there are monitor operations. - - DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); -}; - // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. // A heap location can be set to kUnknownHeapValue when: // - initially set a value. @@ -516,7 +46,7 @@ class LSEVisitor : public HGraphVisitor { side_effects_(side_effects), heap_values_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(heap_locations_collector. - GetNumberOfHeapLocations(), + GetNumberOfHeapLocations(), kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), @@ -760,7 +290,7 @@ class LSEVisitor : public HGraphVisitor { size_t offset, HInstruction* index, int16_t declaring_class_def_index) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -827,7 +357,7 @@ class LSEVisitor : public HGraphVisitor { HInstruction* index, int16_t declaring_class_def_index, HInstruction* value) { - HInstruction* original_ref = HuntForOriginalReference(ref); + HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( ref_info, offset, index, declaring_class_def_index); @@ -1127,25 +657,12 @@ void LoadStoreElimination::Run() { // Skip this optimization. return; } - HeapLocationCollector heap_location_collector(graph_); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - heap_location_collector.VisitBasicBlock(block); - } - if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { - // Bail out if there are too many heap locations to deal with. - return; - } - if (!heap_location_collector.HasHeapStores()) { - // Without heap stores, this pass would act mostly as GVN on heap accesses. + const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector(); + if (heap_location_collector.GetNumberOfHeapLocations() == 0) { + // No HeapLocation information from LSA, skip this optimization. return; } - if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) { - // Don't do load/store elimination if the method has volatile field accesses or - // monitor operations, for now. - // TODO: do it right. - return; - } - heap_location_collector.BuildAliasingMatrix(); + LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 1d9e5c8da6..efe71c733a 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -22,12 +22,16 @@ namespace art { class SideEffectsAnalysis; +class LoadStoreAnalysis; class LoadStoreElimination : public HOptimization { public: - LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects) + LoadStoreElimination(HGraph* graph, + const SideEffectsAnalysis& side_effects, + const LoadStoreAnalysis& lsa) : HOptimization(graph, kLoadStoreEliminationPassName), - side_effects_(side_effects) {} + side_effects_(side_effects), + lsa_(lsa) {} void Run() OVERRIDE; @@ -35,6 +39,7 @@ class LoadStoreElimination : public HOptimization { private: const SideEffectsAnalysis& side_effects_; + const LoadStoreAnalysis& lsa_; DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination); }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 833f32b282..bde7f2c1e0 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2636,15 +2636,17 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) { switch (rhs) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: - return os << "string_init"; + return os << "StringInit"; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - return os << "recursive"; + return os << "Recursive"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: + return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - return os << "direct"; + return os << "Direct"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - return os << "dex_cache_pc_relative"; + return os << "DexCachePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "dex_cache_via_method"; + return os << "DexCacheViaMethod"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 72774da1d1..4d96fbe24c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -4153,6 +4153,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Use the method's own ArtMethod* loaded by the register allocator. kRecursive, + // Use PC-relative boot image ArtMethod* address that will be known at link time. + // Used for boot image methods referenced by boot image code. + kBootImageLinkTimePcRelative, + // Use ArtMethod* at a known address, embed the direct address in the code. // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, @@ -4292,6 +4296,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasPcRelativeMethodLoadKind() const { + return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; + } bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f928f71209..e5ab00bce3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -83,6 +83,7 @@ #include "jit/jit_code_cache.h" #include "jni/quick/jni_compiler.h" #include "licm.h" +#include "load_store_analysis.h" #include "load_store_elimination.h" #include "loop_optimization.h" #include "nodes.h" @@ -465,7 +466,8 @@ static HOptimization* BuildOptimization( const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles, SideEffectsAnalysis* most_recent_side_effects, - HInductionVarAnalysis* most_recent_induction) { + HInductionVarAnalysis* most_recent_induction, + LoadStoreAnalysis* most_recent_lsa) { std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); @@ -499,15 +501,18 @@ static HOptimization* BuildOptimization( } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str()); + return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { return new (arena) IntrinsicsRecognizer(graph, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { CHECK(most_recent_side_effects != nullptr); return new (arena) LICM(graph, *most_recent_side_effects, stats); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + return new (arena) LoadStoreAnalysis(graph); } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { CHECK(most_recent_side_effects != nullptr); - return new (arena) LoadStoreElimination(graph, *most_recent_side_effects); + CHECK(most_recent_lsa != nullptr); + return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa); } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { @@ -556,6 +561,7 @@ static ArenaVector<HOptimization*> BuildOptimizations( // in the pass name list. SideEffectsAnalysis* most_recent_side_effects = nullptr; HInductionVarAnalysis* most_recent_induction = nullptr; + LoadStoreAnalysis* most_recent_lsa = nullptr; ArenaVector<HOptimization*> ret(arena->Adapter()); for (const std::string& pass_name : pass_names) { HOptimization* opt = BuildOptimization( @@ -568,7 +574,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( dex_compilation_unit, handles, most_recent_side_effects, - most_recent_induction); + most_recent_induction, + most_recent_lsa); CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; ret.push_back(opt); @@ -577,6 +584,8 @@ static ArenaVector<HOptimization*> BuildOptimizations( most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { most_recent_induction = down_cast<HInductionVarAnalysis*>(opt); + } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { + most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt); } } return ret; @@ -763,7 +772,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination( graph, stats, "dead_code_elimination$final"); HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); - InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier( + graph, codegen, driver, stats); HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); HConstantFolding* fold2 = new (arena) HConstantFolding( graph, "constant_folding$after_inlining"); @@ -777,15 +787,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2); + LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); + LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa); HSharpening* sharpening = new (arena) HSharpening( graph, codegen, dex_compilation_unit, driver, handles); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_inlining"); + graph, codegen, driver, stats, "instruction_simplifier$after_inlining"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$after_bce"); + graph, codegen, driver, stats, "instruction_simplifier$after_bce"); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( - graph, codegen, stats, "instruction_simplifier$before_codegen"); + graph, codegen, driver, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); @@ -817,6 +828,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, side_effects2, + lsa, lse, cha_guard, dce3, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index ef2c432086..bce54bf49a 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -58,6 +58,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative pointer to a method, + // we need to add the base as the special input. + if (invoke->GetMethodLoadKind() == + HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative && + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { + InitializePCRelativeBasePointer(); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base_); + } + } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); switch (load_kind) { diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a1c916f43a..2743df9dcf 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -205,13 +205,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // method pointer from the invoke. if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { - DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } bool base_added = false; if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasPcRelativeDexCache() && + invoke_static_or_direct->HasPcRelativeMethodLoadKind() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); // Add the extra parameter. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 9a0316330d..7b8104b8ca 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -16,6 +16,7 @@ #include "sharpening.h" +#include "art_method-inl.h" #include "base/casts.h" #include "base/enums.h" #include "class_linker.h" @@ -41,7 +42,9 @@ void HSharpening::Run() { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_); + SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), + codegen_, + compiler_driver_); } else if (instruction->IsLoadString()) { ProcessLoadString(instruction->AsLoadString()); } @@ -68,9 +71,21 @@ static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) return IsInBootImage(method) && !options.GetCompilePic(); } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { + DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + if (!compiler_driver->GetSupportBootImageFixup()) { + return false; + } + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); + DCHECK(klass != nullptr); + const DexFile& dex_file = klass->GetDexFile(); + return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); +} void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen) { + CodeGenerator* codegen, + CompilerDriver* compiler_driver) { if (invoke->IsStringInit()) { // Not using the dex cache arrays. But we could still try to use a better dispatch... // TODO: Use direct_method and direct_code for the appropriate StringFactory method. @@ -108,6 +123,10 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (codegen->GetCompilerOptions().IsBootImage() && + BootImageAOTCanEmbedMethod(callee, compiler_driver)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { // Use PC-relative access to the dex cache arrays. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; @@ -167,8 +186,8 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, if (!compiler_driver->GetSupportBootImageFixup()) { // compiler_driver_test. Do not sharpen. desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } else if ((klass != nullptr) && compiler_driver->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + } else if ((klass != nullptr) && + compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 10707c796f..f74b0afdbf 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -55,7 +55,9 @@ class HSharpening : public HOptimization { REQUIRES_SHARED(Locks::mutator_lock_); // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen); + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); private: void ProcessLoadString(HLoadString* load_string); |