diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 57 | ||||
| -rw-r--r-- | compiler/optimizing/data_type.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 76 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 7 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 186 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_unit_test.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/parallel_move_test.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/select_generator.cc | 17 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_test.cc | 21 |
18 files changed, 279 insertions, 166 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 3bd5e14539..e1f680fb99 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -24,12 +24,12 @@ #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "base/memory_region.h" #include "dex/string_reference.h" #include "dex/type_reference.h" #include "globals.h" #include "graph_visualizer.h" #include "locations.h" -#include "memory_region.h" #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4053f557d9..82d1fda878 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6755,8 +6755,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { return 0; } -// Interface case has 3 temps, one for holding the number of interfaces, one for the current -// interface pointer, one for loading the current interface. +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { @@ -7069,9 +7069,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - // Note that TypeCheckSlowPathX86 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary register for some cases. + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 496d79d6c8..322b0cfc4c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -6063,24 +6063,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - // We need a temporary for holding the iftable length. - return true; - } - return kEmitCompilerReadBarrier && +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; } -static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. +// The other checks have one temp for loading the object's class. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 2; + } + return 1 + NumberOfInstanceOfTemps(type_check_kind); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -6121,11 +6123,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); - // When read barriers are enabled, we need a temporary register for - // some cases. - if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -6136,9 +6134,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(0) : - Location::NoLocation(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6401,14 +6399,8 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - - // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary - // register for some cases. - if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { @@ -6419,9 +6411,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(1) : - Location::NoLocation(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 4a6c91459f..be26e67af3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -210,6 +210,12 @@ class DataType { static bool IsTypeConversionImplicit(Type input_type, Type result_type); static bool IsTypeConversionImplicit(int64_t value, Type result_type); + static bool IsZeroExtension(Type input_type, Type result_type) { + return IsIntOrLongType(result_type) && + IsUnsignedType(input_type) && + Size(result_type) > Size(input_type); + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 676fe6bcb7..d3cf9568c2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -67,7 +67,6 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); - void VisitEqual(HEqual* equal) OVERRIDE; void VisitNotEqual(HNotEqual* equal) OVERRIDE; void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE; @@ -78,6 +77,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitNullCheck(HNullCheck* instruction) OVERRIDE; void VisitArrayLength(HArrayLength* instruction) OVERRIDE; void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + void VisitAbs(HAbs* instruction) OVERRIDE; void VisitAdd(HAdd* instruction) OVERRIDE; void VisitAnd(HAnd* instruction) OVERRIDE; void VisitCondition(HCondition* instruction) OVERRIDE; @@ -903,6 +903,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst to_type == DataType::Type::kInt64); } +// Returns an acceptable substitution for "a" on the select +// construct "a <cmp> b ? c : .." during MIN/MAX recognition. +static HInstruction* AllowInMinMax(IfCondition cmp, + HInstruction* a, + HInstruction* b, + HInstruction* c) { + int64_t value = 0; + if (IsInt64AndGet(b, /*out*/ &value) && + (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) || + ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) { + HConstant* other = c->AsBinaryOperation()->GetConstantRight(); + if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) { + int64_t other_value = Int64FromConstant(other); + bool is_max = (cmp == kCondLT || cmp == kCondLE); + // Allow the max for a < 100 ? max(a, -100) : .. + // or the min for a > -100 ? min(a, 100) : .. + if (is_max ? (value >= other_value) : (value <= other_value)) { + return c; + } + } + } + return nullptr; +} + void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -946,9 +970,17 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are compatible integral types (resulting - // MIN/MAX/ABS type will be int or long, like the condition). + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; + } // Try to replace typical integral MIN/MAX/ABS constructs. if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && ((a == true_value && b == false_value) || @@ -957,19 +989,16 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { // or a > b ? a : b (MAX) or a > b ? b : a (MIN). bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); - } else if (true_value->IsNeg()) { - HInstruction* negated = true_value->InputAt(0); - if ((cmp == kCondLT || cmp == kCondLE) && - (a == negated && a == false_value && IsInt64Value(b, 0))) { - // Found a < 0 ? -a : a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select); - } - } else if (false_value->IsNeg()) { - HInstruction* negated = false_value->InputAt(0); - if ((cmp == kCondGT || cmp == kCondGE) && - (a == true_value && a == negated && IsInt64Value(b, 0))) { - // Found a > 0 ? a : -a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); } } else if (true_value->IsSub() && false_value->IsSub()) { HInstruction* true_sub1 = true_value->InputAt(0); @@ -981,8 +1010,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { ((cmp == kCondLT || cmp == kCondLE) && (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a or - // a < b ? b - a : a - b + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b // which can be replaced by ABS(a - b) for lower precision operands a, b. replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); } @@ -1241,6 +1270,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) { + HInstruction* input = instruction->GetInput(); + if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) { + // Zero extension from narrow to wide can never set sign bit in the wider + // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b). + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + } +} + void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 81c0b50932..c3d643a7d1 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2875,6 +2875,14 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index e61a0b0809..29aecbc097 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3028,6 +3028,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bc1292b2b7..ae248a3e5c 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2693,6 +2693,14 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderMIPS::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorMIPS::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index f429afde2c..9a9ae714bc 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2354,6 +2354,14 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderMIPS64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index c4f322bf0c..f84a33bb8e 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2928,6 +2928,13 @@ void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 437bc3dd3c..7627dc9490 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2737,6 +2737,14 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 1d83815e1f..b41b659083 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -153,6 +153,18 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, return false; } } + // A MIN-MAX on narrower operands qualifies as well + // (returning the operator itself). + if (instruction->IsMin() || instruction->IsMax()) { + HBinaryOperation* min_max = instruction->AsBinaryOperation(); + DCHECK(min_max->GetType() == DataType::Type::kInt32 || + min_max->GetType() == DataType::Type::kInt64); + if (IsSignExtensionAndGet(min_max->InputAt(0), type, operand) && + IsSignExtensionAndGet(min_max->InputAt(1), type, operand)) { + *operand = min_max; + return true; + } + } return false; } @@ -216,6 +228,18 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } } + // A MIN-MAX on narrower operands qualifies as well + // (returning the operator itself). + if (instruction->IsMin() || instruction->IsMax()) { + HBinaryOperation* min_max = instruction->AsBinaryOperation(); + DCHECK(min_max->GetType() == DataType::Type::kInt32 || + min_max->GetType() == DataType::Type::kInt64); + if (IsZeroExtensionAndGet(min_max->InputAt(0), type, operand) && + IsZeroExtensionAndGet(min_max->InputAt(1), type, operand)) { + *operand = min_max; + return true; + } + } return false; } @@ -227,6 +251,7 @@ static bool IsNarrowerOperands(HInstruction* a, /*out*/ HInstruction** r, /*out*/ HInstruction** s, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr && b != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { @@ -247,6 +272,7 @@ static bool IsNarrowerOperand(HInstruction* a, DataType::Type type, /*out*/ HInstruction** r, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r)) { @@ -270,20 +296,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type)); } -// Detect up to two instructions a and b, and an acccumulated constant c. -static bool IsAddConstHelper(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c, - int32_t depth) { - static constexpr int32_t kMaxDepth = 8; // don't search too deep +// Detect up to two added operands a and b and an acccumulated constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth = 8) { // don't search too deep int64_t value = 0; + // Enter add/sub while still within reasonable depth. + if (depth > 0) { + if (instruction->IsAdd()) { + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) && + IsAddConst(instruction->InputAt(1), a, b, c, depth - 1); + } else if (instruction->IsSub() && + IsInt64AndGet(instruction->InputAt(1), &value)) { + *c -= value; + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1); + } + } + // Otherwise, deal with leaf nodes. if (IsInt64AndGet(instruction, &value)) { *c += value; return true; - } else if (instruction->IsAdd() && depth <= kMaxDepth) { - return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); } else if (*a == nullptr) { *a = instruction; return true; @@ -291,42 +325,40 @@ static bool IsAddConstHelper(HInstruction* instruction, *b = instruction; return true; } - return false; // too many non-const operands + return false; // too many operands } -// Detect a + b + c for an optional constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - // Try to find a + b and accumulated c. - if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && - *b != nullptr) { - return true; +// Detect a + b + c with optional constant c. +static bool IsAddConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (IsAddConst(instruction, a, b, c) && *a != nullptr) { + if (*b == nullptr) { + // Constant is usually already present, unless accumulated. + *b = graph->GetConstant(instruction->GetType(), (*c)); + *c = 0; } - // Found a + b. - *a = instruction->InputAt(0); - *b = instruction->InputAt(1); - *c = 0; return true; } return false; } -// Detect a + c for constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - if (IsInt64AndGet(instruction->InputAt(0), c)) { - *a = instruction->InputAt(1); - return true; - } else if (IsInt64AndGet(instruction->InputAt(1), c)) { - *a = instruction->InputAt(0); - return true; - } +// Detect a direct a - b or a hidden a - (-c). +static bool IsSubConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b) { + int64_t c = 0; + if (instruction->IsSub()) { + *a = instruction->InputAt(0); + *b = instruction->InputAt(1); + return true; + } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) { + // Constant for the hidden subtraction. + *b = graph->GetConstant(instruction->GetType(), -c); + return true; } return false; } @@ -378,7 +410,8 @@ static bool CanSetRange(DataType::Type type, } // Accept various saturated addition forms. -static bool IsSaturatedAdd(HInstruction* clippee, +static bool IsSaturatedAdd(HInstruction* a, + HInstruction* b, DataType::Type type, int64_t lo, int64_t hi, @@ -390,8 +423,7 @@ static bool IsSaturatedAdd(HInstruction* clippee, // Tighten the range for signed single clipping on constant. if (!is_unsigned) { int64_t c = 0; - HInstruction* notused = nullptr; - if (IsAddConst(clippee, ¬used, &c)) { + if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) { // For c in proper range and narrower operand r: // MIN(r + c, 127) c > 0 // or MAX(r + c, -128) c < 0 (and possibly redundant bound). @@ -413,7 +445,7 @@ static bool IsSaturatedAdd(HInstruction* clippee, } // Accept various saturated subtraction forms. -static bool IsSaturatedSub(HInstruction* clippee, +static bool IsSaturatedSub(HInstruction* a, DataType::Type type, int64_t lo, int64_t hi, @@ -425,7 +457,7 @@ static bool IsSaturatedSub(HInstruction* clippee, // Tighten the range for signed single clipping on constant. if (!is_unsigned) { int64_t c = 0; - if (IsInt64AndGet(clippee->InputAt(0), /*out*/ &c)) { + if (IsInt64AndGet(a, /*out*/ &c)) { // For c in proper range and narrower operand r: // MIN(c - r, 127) c > 0 // or MAX(c - r, -128) c < 0 (and possibly redundant bound). @@ -1521,8 +1553,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return false; // reject, unless all operands are same-extension narrower } // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = opa; s = opb; @@ -2026,31 +2057,37 @@ bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node, instruction->GetType() != DataType::Type::kInt64) { return false; } - // Clipped addition or subtraction? + // Clipped addition or subtraction on narrower operands? We will try both + // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending + // on what clipping values are used, to get most benefits. int64_t lo = std::numeric_limits<int64_t>::min(); int64_t hi = std::numeric_limits<int64_t>::max(); HInstruction* clippee = FindClippee(instruction, &lo, &hi); - bool is_add = true; - if (clippee->IsAdd()) { - is_add = true; - } else if (clippee->IsSub()) { - is_add = false; - } else { - return false; // clippee is not add/sub - } - // Addition or subtraction on narrower operands? + HInstruction* a = nullptr; + HInstruction* b = nullptr; HInstruction* r = nullptr; HInstruction* s = nullptr; bool is_unsigned = false; - if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) && - (is_add ? IsSaturatedAdd(clippee, type, lo, hi, is_unsigned) - : IsSaturatedSub(clippee, type, lo, hi, is_unsigned))) { - DCHECK(r != nullptr); - DCHECK(s != nullptr); + bool is_add = true; + int64_t c = 0; + // First try for saturated addition. + if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 && + IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && + IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) { + is_add = true; } else { - return false; + // Then try again for saturated subtraction. + a = b = r = s = nullptr; + if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) && + IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && + IsSaturatedSub(r, type, lo, hi, is_unsigned)) { + is_add = false; + } else { + return false; + } } // Accept saturation idiom for vectorizable operands. + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -2101,8 +2138,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* a = nullptr; HInstruction* b = nullptr; int64_t c = 0; - if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { - DCHECK(a != nullptr && b != nullptr); + if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { // Accept c == 1 (rounded) or c == 0 (not rounded). bool is_rounded = false; if (c == 1) { @@ -2124,8 +2160,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -2175,19 +2210,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->GetType() == reduction_type && v->IsAbs()) { - HInstruction* x = v->InputAt(0); - if (x->GetType() == reduction_type) { - int64_t c = 0; - if (x->IsSub()) { - a = x->InputAt(0); - b = x->InputAt(1); - } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { - b = graph_->GetConstant(reduction_type, -c); // hidden SUB! - } - } - } - if (a == nullptr || b == nullptr) { + if (v->IsAbs() && + v->GetType() == reduction_type && + IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { + DCHECK(a != nullptr && b != nullptr); + } else { return false; } // Accept same-type or consistent sign extension for narrower-type on operands a and b. @@ -2220,8 +2247,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, } // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand // idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = s = v->InputAt(0); } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index e0a9cfb934..9a26f2f6c4 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -125,11 +125,6 @@ class OptimizingCompilerStats { } void Log() const { - if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { - // Log only in debug builds or if the compiler is verbose. - return; - } - uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic); uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub); uint32_t bytecode_attempts = diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6dcbadba6e..a9bc5664c0 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -20,6 +20,7 @@ #include <memory> #include <vector> +#include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" @@ -97,7 +98,7 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; } private: - ArenaPool pool_; + MallocArenaPool pool_; ArenaAllocator allocator_; ArenaStack arena_stack_; ScopedArenaAllocator scoped_allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index cb87cabe1c..be35201166 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -180,7 +181,7 @@ TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); TYPED_TEST(ParallelMoveTest, Dependency) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) { } TYPED_TEST(ParallelMoveTest, Cycle) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) { } TYPED_TEST(ParallelMoveTest, ConstantLast) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); @@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) { } TYPED_TEST(ParallelMoveTest, Pairs) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) { } TYPED_TEST(ParallelMoveTest, MultiCycles) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { // Test that we do 64bits moves before 32bits moves. TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 66e51421ca..3f52bdd13c 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -43,12 +43,16 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - if (num_instructions > kMaxInstructionsInBranch) { - return false; - } return instruction->IsGoto() || instruction->IsReturn(); } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { - num_instructions++; + if (instruction->IsSelect() && + instruction->AsSelect()->GetCondition()->GetBlock() == block) { + // Count one HCondition and HSelect in the same block as a single instruction. + // This enables finding nested selects. + continue; + } else if (++num_instructions > kMaxInstructionsInBranch) { + return false; // bail as soon as we exceed number of allowed instructions + } } else { return false; } @@ -97,6 +101,7 @@ void HSelectGenerator::Run() { HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); DCHECK_NE(true_block, false_block); + if (!IsSimpleBlock(true_block) || !IsSimpleBlock(false_block) || !BlocksMergeTogether(true_block, false_block)) { @@ -107,10 +112,10 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { true_block->GetFirstInstruction()->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { false_block->GetFirstInstruction()->MoveBefore(if_instruction); } DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 579aabdb5f..268e9bd6e0 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -19,9 +19,9 @@ #include "base/bit_vector-inl.h" #include "base/hash_map.h" +#include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" -#include "memory_region.h" #include "method_info.h" #include "nodes.h" #include "stack_map.h" diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 7e517f3485..e36c592662 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,6 +18,7 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" @@ -46,7 +47,7 @@ static bool CheckStackMask( using Kind = DexRegisterLocation::Kind; TEST(StackMapTest, Test1) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -128,7 +129,7 @@ TEST(StackMapTest, Test1) { } TEST(StackMapTest, Test2) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -413,7 +414,7 @@ TEST(StackMapTest, Test2) { } TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -508,7 +509,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { } TEST(StackMapTest, TestNonLiveDexRegisters) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -588,7 +589,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { // StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do // not treat it as kNoDexRegisterMap. TEST(StackMapTest, DexRegisterMapOffsetOverflow) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -652,7 +653,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { } TEST(StackMapTest, TestShareDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -711,7 +712,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) { } TEST(StackMapTest, TestNoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -761,7 +762,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { } TEST(StackMapTest, InlineTest) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -949,7 +950,7 @@ TEST(StackMapTest, CodeOffsetTest) { } TEST(StackMapTest, TestDeduplicateStackMask) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); @@ -978,7 +979,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) { } TEST(StackMapTest, TestInvokeInfo) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); |