summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm.h2
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc38
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h5
-rw-r--r--compiler/optimizing/code_generator_mips.h2
-rw-r--r--compiler/optimizing/code_generator_mips64.h2
-rw-r--r--compiler/optimizing/inliner.cc55
-rw-r--r--compiler/optimizing/inliner.h3
-rw-r--r--compiler/optimizing/instruction_builder.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm.cc7
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc9
-rw-r--r--compiler/optimizing/loop_optimization.cc146
-rw-r--r--compiler/optimizing/loop_optimization.h6
-rw-r--r--compiler/optimizing/nodes.cc2
15 files changed, 190 insertions, 95 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9ef692aaf0..c2b2ebfade 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -33,8 +33,8 @@
#include "read_barrier_option.h"
#include "stack_map_stream.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/label.h"
-#include "utils/type_reference.h"
namespace art {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index fa1c14dcda..2409a4d38d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -24,8 +24,8 @@
#include "nodes.h"
#include "string_reference.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/arm/assembler_thumb2.h"
-#include "utils/type_reference.h"
namespace art {
namespace arm {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 71e221da22..7a4b3d4805 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -25,8 +25,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/arm64/assembler_arm64.h"
-#include "utils/type_reference.h"
// TODO(VIXL): Make VIXL compile with -Wshadow.
#pragma GCC diagnostic push
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 34821f83cd..1f8e1efd5e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2139,7 +2139,8 @@ static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
static void GenerateLongComparesAndJumps(HCondition* cond,
vixl32::Label* true_label,
vixl32::Label* false_label,
- CodeGeneratorARMVIXL* codegen) {
+ CodeGeneratorARMVIXL* codegen,
+ bool is_far_target = true) {
LocationSummary* locations = cond->GetLocations();
Location left = locations->InAt(0);
Location right = locations->InAt(1);
@@ -2190,12 +2191,12 @@ static void GenerateLongComparesAndJumps(HCondition* cond,
__ Cmp(left_high, val_high);
if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
} else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
} else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
}
// Must be equal high, so compare the lows.
__ Cmp(left_low, val_low);
@@ -2205,19 +2206,19 @@ static void GenerateLongComparesAndJumps(HCondition* cond,
__ Cmp(left_high, right_high);
if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
} else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
} else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
+ __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+ __ B(ARMCondition(false_high_cond), false_label, is_far_target);
}
// Must be equal high, so compare the lows.
__ Cmp(left_low, right_low);
}
// The last comparison might be unsigned.
// TODO: optimize cases where this is always true/false
- __ B(final_condition, true_label);
+ __ B(final_condition, true_label, is_far_target);
}
static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
@@ -2292,7 +2293,7 @@ static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codege
vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
vixl32::Label true_label, false_label;
- GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false);
// False case: result = 0.
__ Bind(&false_label);
@@ -2957,7 +2958,8 @@ void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target_in,
- vixl32::Label* false_target_in) {
+ vixl32::Label* false_target_in,
+ bool is_far_target) {
if (CanGenerateTest(condition, codegen_->GetAssembler())) {
vixl32::Label* non_fallthrough_target;
bool invert;
@@ -2973,7 +2975,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c
const auto cond = GenerateTest(condition, invert, codegen_);
- __ B(cond.first, non_fallthrough_target);
+ __ B(cond.first, non_fallthrough_target, is_far_target);
if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
__ B(false_target_in);
@@ -2989,7 +2991,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c
vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
+ GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target);
if (false_target != &fallthrough) {
__ B(false_target);
@@ -3057,7 +3059,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru
// the HCondition, generate the comparison directly.
Primitive::Type type = condition->InputAt(0)->GetType();
if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
- GenerateCompareTestAndBranch(condition, true_target, false_target);
+ GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
return;
}
@@ -3076,14 +3078,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru
if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
if (arm_cond.Is(eq)) {
- __ CompareAndBranchIfZero(left, non_fallthrough_target);
+ __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
} else {
DCHECK(arm_cond.Is(ne));
- __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+ __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
}
} else {
__ Cmp(left, right);
- __ B(arm_cond, non_fallthrough_target);
+ __ B(arm_cond, non_fallthrough_target, far_target);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 91f7524c8e..ef809510ad 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -24,8 +24,8 @@
#include "nodes.h"
#include "string_reference.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/arm/assembler_arm_vixl.h"
-#include "utils/type_reference.h"
// TODO(VIXL): make vixl clean wrt -Wshadow.
#pragma GCC diagnostic push
@@ -400,7 +400,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool far_target = true);
void GenerateCompareTestAndBranch(HCondition* condition,
vixl::aarch32::Label* true_target,
- vixl::aarch32::Label* false_target);
+ vixl::aarch32::Label* false_target,
+ bool is_far_target = true);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index ff1fde6489..736b5070d9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -23,8 +23,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "string_reference.h"
+#include "type_reference.h"
#include "utils/mips/assembler_mips.h"
-#include "utils/type_reference.h"
namespace art {
namespace mips {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index f49ad49fce..8405040386 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -21,8 +21,8 @@
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
+#include "type_reference.h"
#include "utils/mips64/assembler_mips64.h"
-#include "utils/type_reference.h"
namespace art {
namespace mips64 {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0ec6ee2fe2..f203d7f47e 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -470,6 +470,33 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
return inline_cache;
}
+bool HInliner::UseOnlyPolymorphicInliningWithNoDeopt() {
+ // If we are compiling AOT or OSR, pretend the call using inline caches is polymorphic and
+ // do not generate a deopt.
+ //
+ // For AOT:
+ // Generating a deopt does not ensure that we will actually capture the new types;
+ // and the danger is that we could be stuck in a loop with "forever" deoptimizations.
+ // Take for example the following scenario:
+ // - we capture the inline cache in one run
+ // - the next run, we deoptimize because we miss a type check, but the method
+ // never becomes hot again
+ // In this case, the inline cache will not be updated in the profile and the AOT code
+ // will keep deoptimizing.
+ // Another scenario is if we use profile compilation for a process which is not allowed
+ // to JIT (e.g. system server). If we deoptimize we will run interpreted code for the
+ // rest of the lifetime.
+ // TODO(calin):
+ // This is a compromise because we will most likely never update the inline cache
+ // in the profile (unless there's another reason to deopt). So we might be stuck with
+ // a sub-optimal inline cache.
+ // We could be smarter when capturing inline caches to mitigate this.
+ // (e.g. by having different thresholds for new and old methods).
+ //
+ // For OSR:
+ // We may come from the interpreter and it may have seen different receiver types.
+ return Runtime::Current()->IsAotCompiler() || outermost_graph_->IsCompilingOsr();
+}
bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
HInvoke* invoke_instruction,
ArtMethod* resolved_method)
@@ -503,9 +530,7 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
case kInlineCacheMonomorphic: {
MaybeRecordStat(kMonomorphicCall);
- if (outermost_graph_->IsCompilingOsr()) {
- // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
- // interpreter and it may have seen different receiver types.
+ if (UseOnlyPolymorphicInliningWithNoDeopt()) {
return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
} else {
return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
@@ -578,12 +603,11 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
return kInlineCacheNoData;
}
- ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
- bool found = pci->GetMethod(caller_dex_file.GetLocation(),
- caller_dex_file.GetLocationChecksum(),
- caller_compilation_unit_.GetDexMethodIndex(),
- &offline_profile);
- if (!found) {
+ std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile =
+ pci->GetMethod(caller_dex_file.GetLocation(),
+ caller_dex_file.GetLocationChecksum(),
+ caller_compilation_unit_.GetDexMethodIndex());
+ if (offline_profile == nullptr) {
return kInlineCacheNoData; // no profile information for this invocation.
}
@@ -593,7 +617,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
return kInlineCacheNoData;
} else {
return ExtractClassesFromOfflineProfile(invoke_instruction,
- offline_profile,
+ *(offline_profile.get()),
*inline_cache);
}
}
@@ -603,8 +627,8 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
/*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
REQUIRES_SHARED(Locks::mutator_lock_) {
- const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc());
- if (it == offline_profile.inline_caches.end()) {
+ const auto it = offline_profile.inline_caches->find(invoke_instruction->GetDexPc());
+ if (it == offline_profile.inline_caches->end()) {
return kInlineCacheUninitialized;
}
@@ -926,14 +950,11 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
// If we have inlined all targets before, and this receiver is the last seen,
// we deoptimize instead of keeping the original invoke instruction.
- bool deoptimize = all_targets_inlined &&
+ bool deoptimize = !UseOnlyPolymorphicInliningWithNoDeopt() &&
+ all_targets_inlined &&
(i != InlineCache::kIndividualCacheSize - 1) &&
(classes->Get(i + 1) == nullptr);
- if (outermost_graph_->IsCompilingOsr()) {
- // We do not support HDeoptimize in OSR methods.
- deoptimize = false;
- }
HInstruction* compare = AddTypeGuard(receiver,
cursor,
bb_cursor,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 9e4685cbf4..67476b6956 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -180,6 +180,9 @@ class HInliner : public HOptimization {
Handle<mirror::ObjectArray<mirror::Class>> classes)
REQUIRES_SHARED(Locks::mutator_lock_);
+ // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
+ bool UseOnlyPolymorphicInliningWithNoDeopt();
+
// Try CHA-based devirtualization to change virtual method calls into
// direct calls.
// Returns the actual method that resolved_method can be devirtualized to.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 40fafb0ae5..df9e7164ed 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1000,8 +1000,8 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u
void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
DCHECK(allocation != nullptr &&
- allocation->IsNewInstance() ||
- allocation->IsNewArray()); // corresponding to "new" keyword in JLS.
+ (allocation->IsNewInstance() ||
+ allocation->IsNewArray())); // corresponding to "new" keyword in JLS.
if (allocation->IsNewInstance()) {
// STRING SPECIAL HANDLING:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8a62aafae..9803c9a0e9 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2758,12 +2758,15 @@ void IntrinsicCodeGeneratorARM::VisitThreadInterrupted(HInvoke* invoke) {
int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
__ LoadFromOffset(kLoadWord, out, TR, offset);
Label done;
- __ CompareAndBranchIfZero(out, &done);
+ Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+ __ CompareAndBranchIfZero(out, final_label);
__ dmb(ISH);
__ LoadImmediate(IP, 0);
__ StoreToOffset(kStoreWord, IP, TR, offset);
__ dmb(ISH);
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index ce3ba52b34..1a33b0ee01 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -3127,7 +3127,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
__ Add(out, in, -info.low);
__ Cmp(out, info.high - info.low + 1);
vixl32::Label allocate, done;
- __ B(hs, &allocate);
+ __ B(hs, &allocate, /* is_far_target */ false);
// If the value is within the bounds, load the j.l.Integer directly from the array.
uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
@@ -3164,12 +3164,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
vixl32::Label done;
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
__ Dmb(vixl32::ISH);
__ Mov(temp, 0);
assembler->StoreToOffset(kStoreWord, temp, tr, offset);
__ Dmb(vixl32::ISH);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 963df5a938..94787c99b2 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,39 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
return false;
}
+// Detect situations with same-extension narrower operands.
+// Returns true on success and sets is_unsigned accordingly.
+static bool IsNarrowerOperands(HInstruction* a,
+ HInstruction* b,
+ Primitive::Type type,
+ /*out*/ HInstruction** r,
+ /*out*/ HInstruction** s,
+ /*out*/ bool* is_unsigned) {
+ if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) {
+ *is_unsigned = false;
+ return true;
+ } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) {
+ *is_unsigned = true;
+ return true;
+ }
+ return false;
+}
+
+// As above, single operand.
+static bool IsNarrowerOperand(HInstruction* a,
+ Primitive::Type type,
+ /*out*/ HInstruction** r,
+ /*out*/ bool* is_unsigned) {
+ if (IsSignExtensionAndGet(a, type, r)) {
+ *is_unsigned = false;
+ return true;
+ } else if (IsZeroExtensionAndGet(a, type, r)) {
+ *is_unsigned = true;
+ return true;
+ }
+ return false;
+}
+
// Detect up to two instructions a and b, and an acccumulated constant c.
static bool IsAddConstHelper(HInstruction* instruction,
/*out*/ HInstruction** a,
@@ -756,7 +789,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
}
-// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+// TODO: saturation arithmetic.
bool HLoopOptimization::VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -867,25 +900,38 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
// Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
return false; // unsupported instruction
- } else if ((instruction->IsShr() || instruction->IsUShr()) &&
- HasVectorRestrictions(restrictions, kNoHiBits)) {
- return false; // hibits may impact lobits; TODO: we can do better!
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // Shifts right need extra care to account for higher order bits.
+ // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
+ if (instruction->IsShr() &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless all operands are sign-extension narrower
+ } else if (instruction->IsUShr() &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
+ return false; // reject, unless all operands are zero-extension narrower
+ }
}
// Accept shift operator for vectorizable/invariant operands.
// TODO: accept symbolic, albeit loop invariant shift factors.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
int64_t distance = 0;
- if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
IsInt64AndGet(opb, /*out*/ &distance)) {
// Restrict shift distance to packed data type width.
int64_t max_distance = Primitive::ComponentSize(type) * 8;
if (0 <= distance && distance < max_distance) {
if (generate_code) {
- GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
+ GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
}
return true;
}
@@ -899,16 +945,23 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
case Intrinsics::kMathAbsFloat:
case Intrinsics::kMathAbsDouble: {
// Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoAbs) ||
- HasVectorRestrictions(restrictions, kNoHiBits)) {
- // TODO: we can do better for some hibits cases.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoAbs)) {
return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless operand is sign-extension narrower
}
// Accept ABS(x) for vectorizable operand.
- HInstruction* opa = instruction->InputAt(0);
- if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions)) {
if (generate_code) {
- GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
}
return true;
}
@@ -923,18 +976,28 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMaxDoubleDouble: {
// Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoMinMax) ||
- HasVectorRestrictions(restrictions, kNoHiBits)) {
- // TODO: we can do better for some hibits cases.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ HInstruction* s = opb;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoMinMax)) {
return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+ return false; // reject, unless all operands are same-extension narrower
}
// Accept MIN/MAX(x, y) for vectorizable operands.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
- VectorizeUse(node, opb, generate_code, type, restrictions)) {
+ DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ s = opb;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
if (generate_code) {
- GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+ GenerateVecOp(
+ instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
}
return true;
}
@@ -959,11 +1022,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1098,13 +1161,14 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
void HLoopOptimization::GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
- Primitive::Type type) {
+ Primitive::Type type,
+ bool is_unsigned) {
if (vector_mode_ == kSequential) {
- // Scalar code follows implicit integral promotion.
- if (type == Primitive::kPrimBoolean ||
- type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort) {
+ // Non-converting scalar code follows implicit integral promotion.
+ if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
+ type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort)) {
type = Primitive::kPrimInt;
}
}
@@ -1185,7 +1249,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
case Intrinsics::kMathMinLongLong:
case Intrinsics::kMathMinFloatFloat:
case Intrinsics::kMathMinDoubleDouble: {
- bool is_unsigned = false; // TODO: detect unsigned versions
vector = new (global_allocator_)
HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
break;
@@ -1194,7 +1257,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
case Intrinsics::kMathMaxLongLong:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMaxDoubleDouble: {
- bool is_unsigned = false; // TODO: detect unsigned versions
vector = new (global_allocator_)
HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
break;
@@ -1258,7 +1320,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
Primitive::Type type,
uint64_t restrictions) {
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
- // (note whether the sign bit in higher precision is shifted in has no effect
+ // (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
int64_t distance = 0;
if ((instruction->IsShr() ||
@@ -1269,6 +1331,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* b = nullptr;
int64_t c = 0;
if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+ DCHECK(a != nullptr && b != nullptr);
// Accept c == 1 (rounded) or c == 0 (not rounded).
bool is_rounded = false;
if (c == 1) {
@@ -1280,11 +1343,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* r = nullptr;
HInstruction* s = nullptr;
bool is_unsigned = false;
- if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
- is_unsigned = true;
- } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
- is_unsigned = false;
- } else {
+ if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
return false;
}
// Deal with vector restrictions.
@@ -1295,6 +1354,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
DCHECK(r != nullptr && s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = instruction->InputAt(0);
+ s = instruction->InputAt(1);
+ }
if (VectorizeUse(node, r, generate_code, type, restrictions) &&
VectorizeUse(node, s, generate_code, type, restrictions)) {
if (generate_code) {
@@ -1308,12 +1371,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
is_unsigned,
is_rounded));
} else {
- VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
- VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
- GenerateVecOp(instruction,
- vector_map_->Get(instruction->InputAt(0)),
- vector_map_->Get(instruction->InputAt(1)),
- type);
+ GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
}
}
return true;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6d5978d337..35298d4076 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -137,7 +137,11 @@ class HLoopOptimization : public HOptimization {
HInstruction* opa,
HInstruction* opb,
Primitive::Type type);
- void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+ void GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type,
+ bool is_unsigned = false);
// Vectorization idioms.
bool VectorizeHalvingAddIdiom(LoopNode* node,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index bde7f2c1e0..689991010e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2642,7 +2642,7 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- return os << "Direct";
+ return os << "DirectAddress";
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
return os << "DexCachePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: