summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc65
-rw-r--r--compiler/optimizing/builder.cc3
-rw-r--r--compiler/optimizing/builder.h5
-rw-r--r--compiler/optimizing/code_generator_arm64.cc41
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc66
-rw-r--r--compiler/optimizing/code_generator_mips.cc98
-rw-r--r--compiler/optimizing/code_generator_mips64.cc96
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc20
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc44
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc44
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc34
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc24
-rw-r--r--compiler/optimizing/code_generator_x86.cc37
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc37
-rw-r--r--compiler/optimizing/code_sinking.cc4
-rw-r--r--compiler/optimizing/codegen_test.cc87
-rw-r--r--compiler/optimizing/constant_folding_test.cc20
-rw-r--r--compiler/optimizing/data_type-inl.h2
-rw-r--r--compiler/optimizing/data_type.cc2
-rw-r--r--compiler/optimizing/data_type.h27
-rw-r--r--compiler/optimizing/dead_code_elimination.cc136
-rw-r--r--compiler/optimizing/dead_code_elimination.h1
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc8
-rw-r--r--compiler/optimizing/dominator_test.cc34
-rw-r--r--compiler/optimizing/find_loops_test.cc28
-rw-r--r--compiler/optimizing/graph_checker.cc10
-rw-r--r--compiler/optimizing/graph_checker_test.cc14
-rw-r--r--compiler/optimizing/graph_visualizer.cc11
-rw-r--r--compiler/optimizing/inliner.cc58
-rw-r--r--compiler/optimizing/inliner.h2
-rw-r--r--compiler/optimizing/instruction_builder.cc2
-rw-r--r--compiler/optimizing/instruction_builder.h3
-rw-r--r--compiler/optimizing/intrinsics.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc8
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc8
-rw-r--r--compiler/optimizing/intrinsics_mips.cc9
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc9
-rw-r--r--compiler/optimizing/intrinsics_x86.cc8
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc8
-rw-r--r--compiler/optimizing/linearize_test.cc19
-rw-r--r--compiler/optimizing/live_ranges_test.cc16
-rw-r--r--compiler/optimizing/liveness_test.cc28
-rw-r--r--compiler/optimizing/load_store_elimination.cc381
-rw-r--r--compiler/optimizing/loop_optimization.cc68
-rw-r--r--compiler/optimizing/nodes.cc9
-rw-r--r--compiler/optimizing/nodes.h45
-rw-r--r--compiler/optimizing/nodes_vector.h72
-rw-r--r--compiler/optimizing/nodes_vector_test.cc168
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc4
-rw-r--r--compiler/optimizing/optimizing_compiler.cc54
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h1
-rw-r--r--compiler/optimizing/optimizing_unit_test.h60
-rw-r--r--compiler/optimizing/pretty_printer_test.cc28
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc2
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc2
-rw-r--r--compiler/optimizing/register_allocator_test.cc18
-rw-r--r--compiler/optimizing/scheduler_test.cc6
-rw-r--r--compiler/optimizing/ssa_test.cc30
-rw-r--r--compiler/optimizing/superblock_cloner.cc704
-rw-r--r--compiler/optimizing/superblock_cloner.h323
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc121
-rw-r--r--compiler/optimizing/suspend_check_test.cc16
64 files changed, 2468 insertions, 848 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 9c2068ec5e..d893cc88c4 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -302,7 +302,7 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> {
ValueBound GetLower() const { return lower_; }
ValueBound GetUpper() const { return upper_; }
- bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); }
+ bool IsConstantValueRange() const { return lower_.IsConstant() && upper_.IsConstant(); }
// If it's certain that this value range fits in other_range.
virtual bool FitsIn(ValueRange* other_range) const {
@@ -789,24 +789,33 @@ class BCEVisitor : public HGraphVisitor {
ApplyRangeFromComparison(left, block, false_successor, new_range);
}
} else if (cond == kCondNE || cond == kCondEQ) {
- if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) {
- // Special case:
- // length == [c,d] yields [c, d] along true
- // length != [c,d] yields [c, d] along false
- if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) {
- ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper);
- ApplyRangeFromComparison(
- left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
- }
- // In addition:
- // length == 0 yields [1, max] along false
- // length != 0 yields [1, max] along true
- if (lower.GetConstant() == 0 && upper.GetConstant() == 0) {
- ValueRange* new_range = new (&allocator_) ValueRange(
- &allocator_, ValueBound(nullptr, 1), ValueBound::Max());
- ApplyRangeFromComparison(
- left, block, cond == kCondEQ ? false_successor : true_successor, new_range);
+ if (left->IsArrayLength()) {
+ if (lower.IsConstant() && upper.IsConstant()) {
+ // Special case:
+ // length == [c,d] yields [c, d] along true
+ // length != [c,d] yields [c, d] along false
+ if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) {
+ ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper);
+ ApplyRangeFromComparison(
+ left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
+ }
+ // In addition:
+ // length == 0 yields [1, max] along false
+ // length != 0 yields [1, max] along true
+ if (lower.GetConstant() == 0 && upper.GetConstant() == 0) {
+ ValueRange* new_range = new (&allocator_) ValueRange(
+ &allocator_, ValueBound(nullptr, 1), ValueBound::Max());
+ ApplyRangeFromComparison(
+ left, block, cond == kCondEQ ? false_successor : true_successor, new_range);
+ }
}
+ } else if (lower.IsRelatedToArrayLength() && lower.Equals(upper)) {
+ // Special aliasing case, with x not array length itself:
+ // x == [length,length] yields x == length along true
+ // x != [length,length] yields x == length along false
+ ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper);
+ ApplyRangeFromComparison(
+ left, block, cond == kCondEQ ? true_successor : false_successor, new_range);
}
}
}
@@ -827,9 +836,23 @@ class BCEVisitor : public HGraphVisitor {
ValueRange array_range(&allocator_, lower, upper);
// Try index range obtained by dominator-based analysis.
ValueRange* index_range = LookupValueRange(index, block);
- if (index_range != nullptr && index_range->FitsIn(&array_range)) {
- ReplaceInstruction(bounds_check, index);
- return;
+ if (index_range != nullptr) {
+ if (index_range->FitsIn(&array_range)) {
+ ReplaceInstruction(bounds_check, index);
+ return;
+ } else if (index_range->IsConstantValueRange()) {
+ // If the non-constant index turns out to have a constant range,
+ // make one more attempt to get a constant in the array range.
+ ValueRange* existing_range = LookupValueRange(array_length, block);
+ if (existing_range != nullptr &&
+ existing_range->IsConstantValueRange()) {
+ ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower());
+ if (index_range->FitsIn(&constant_array_range)) {
+ ReplaceInstruction(bounds_check, index);
+ return;
+ }
+ }
+ }
}
// Try index range obtained by induction variable analysis.
// Disables dynamic bce if OOB is certain.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index af537dd653..a1a5692ef6 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -43,7 +43,7 @@ HGraphBuilder::HGraphBuilder(HGraph* graph,
CompilerDriver* driver,
CodeGenerator* code_generator,
OptimizingCompilerStats* compiler_stats,
- const uint8_t* interpreter_metadata,
+ ArrayRef<const uint8_t> interpreter_metadata,
VariableSizedHandleScope* handles)
: graph_(graph),
dex_file_(&graph->GetDexFile()),
@@ -70,7 +70,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph,
compiler_driver_(nullptr),
code_generator_(nullptr),
compilation_stats_(nullptr),
- interpreter_metadata_(nullptr),
handles_(handles),
return_type_(return_type) {}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c16a3a928d..5a1914ce08 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_BUILDER_H_
#include "base/arena_object.h"
+#include "base/array_ref.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file-inl.h"
#include "dex/dex_file.h"
@@ -40,7 +41,7 @@ class HGraphBuilder : public ValueObject {
CompilerDriver* driver,
CodeGenerator* code_generator,
OptimizingCompilerStats* compiler_stats,
- const uint8_t* interpreter_metadata,
+ ArrayRef<const uint8_t> interpreter_metadata,
VariableSizedHandleScope* handles);
// Only for unit testing.
@@ -73,7 +74,7 @@ class HGraphBuilder : public ValueObject {
CodeGenerator* const code_generator_;
OptimizingCompilerStats* const compilation_stats_;
- const uint8_t* const interpreter_metadata_;
+ const ArrayRef<const uint8_t> interpreter_metadata_;
VariableSizedHandleScope* const handles_;
const DataType::Type return_type_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13886b32b3..3fd88e3e18 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1488,6 +1488,14 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
MacroAssembler* masm = GetVIXLAssembler();
__ Bind(&frame_entry_label_);
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireX();
+ __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Add(temp, temp, 1);
+ __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ }
+
bool do_overflow_check =
FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
if (do_overflow_check) {
@@ -1881,6 +1889,8 @@ void CodeGeneratorARM64::Load(DataType::Type type,
DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
__ Ldr(dst, src);
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
}
@@ -1959,6 +1969,8 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
__ Fmov(FPRegister(dst), temp);
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
}
@@ -1986,6 +1998,8 @@ void CodeGeneratorARM64::Store(DataType::Type type,
DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
__ Str(src, dst);
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
}
@@ -2063,6 +2077,8 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
}
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
}
@@ -2097,13 +2113,17 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
- size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
// TODO(vixl): Let the MacroAssembler handle MemOperand.
- __ Add(temp, class_reg, status_offset);
+ __ Add(temp, class_reg, status_byte_offset);
__ Ldarb(temp, HeapOperand(temp));
- __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+ __ Cmp(temp, shifted_initialized_value);
__ B(lo, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -3487,12 +3507,25 @@ void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant
}
void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp1 = temps.AcquireX();
+ Register temp2 = temps.AcquireX();
+ __ Ldr(temp1, MemOperand(sp, 0));
+ __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Add(temp2, temp2, 1);
+ __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
+ }
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7c6a5fde40..6d49b32dbc 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2485,13 +2485,36 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Bind(&frame_entry_label_);
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Add(temp, temp, 1);
+ __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ }
+
if (HasEmptyFrame()) {
return;
}
if (!skip_overflow_check) {
+ // Using r4 instead of IP saves 2 bytes.
UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
+ vixl32::Register temp;
+ // TODO: Remove this check when R4 is made a callee-save register
+ // in ART compiled code (b/72801708). Currently we need to make
+ // sure r4 is not blocked, e.g. in special purpose
+ // TestCodeGeneratorARMVIXL; also asserting that r4 is available
+ // here.
+ if (!blocked_core_registers_[R4]) {
+ for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+ DCHECK(!reg.Is(r4));
+ }
+ DCHECK(!kCoreCalleeSaves.Includes(r4));
+ temp = r4;
+ } else {
+ temp = temps.Acquire();
+ }
__ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
// The load must immediately precede RecordPcInfo.
ExactAssemblyScope aas(GetVIXLAssembler(),
@@ -2642,6 +2665,8 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ
}
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
break;
@@ -2657,6 +2682,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32: {
return LocationFrom(r0);
}
@@ -2665,6 +2691,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::T
return LocationFrom(s0);
}
+ case DataType::Type::kUint64:
case DataType::Type::kInt64: {
return LocationFrom(r0, r1);
}
@@ -2776,12 +2803,26 @@ void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poi
}
void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ __ Push(vixl32::Register(kMethodRegister));
+ GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
+ __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Add(temp, temp, 1);
+ __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Pop(vixl32::Register(kMethodRegister));
+ }
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -5490,6 +5531,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
@@ -5734,6 +5777,8 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << load_type;
UNREACHABLE();
@@ -6226,6 +6271,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -6515,6 +6562,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << value_type;
UNREACHABLE();
@@ -7172,11 +7221,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
- GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
- temp,
- class_reg,
- mirror::Class::StatusOffset().Int32Value());
- __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+ GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset);
+ __ Cmp(temp, shifted_initialized_value);
__ B(lo, slow_path->GetEntryLabel());
// Even if the initialized flag is set, we may be in a situation where caches are not synced
// properly. Therefore, we do a memory fence.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ebe252a9c8..855da2b18f 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -58,9 +58,11 @@ Location MipsReturnLocation(DataType::Type return_type) {
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32:
return Location::RegisterLocation(V0);
+ case DataType::Type::kUint64:
case DataType::Type::kInt64:
return Location::RegisterPairLocation(V0, V1);
@@ -140,6 +142,8 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
break;
@@ -391,7 +395,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- if (!is_fatal_) {
+ if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
SaveLiveRegisters(codegen, locations);
}
@@ -1276,6 +1280,10 @@ static dwarf::Reg DWARFReg(Register reg) {
void CodeGeneratorMIPS::GenerateFrameEntry() {
__ Bind(&frame_entry_label_);
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ LOG(WARNING) << "Unimplemented hotness update in mips backend";
+ }
+
bool do_overflow_check =
FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod();
@@ -1915,8 +1923,14 @@ void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool d
void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
Register class_reg) {
- __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
- __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+ __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+ __ LoadConst32(AT, shifted_initialized_value);
__ Bltu(TMP, AT, slow_path->GetEntryLabel());
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
__ Sync(0);
@@ -2811,6 +2825,8 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -3126,6 +3142,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -3265,26 +3283,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
}
void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
- LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- switch (type_check_kind) {
- case TypeCheckKind::kExactCheck:
- case TypeCheckKind::kAbstractClassCheck:
- case TypeCheckKind::kClassHierarchyCheck:
- case TypeCheckKind::kArrayObjectCheck:
- call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
- break;
- case TypeCheckKind::kArrayCheck:
- case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCallOnSlowPath;
- break;
- }
-
+ LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -3313,18 +3313,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
MipsLabel done;
- // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
- // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
- // read barriers is done for performance and code size reasons.
- bool is_type_check_slow_path_fatal = false;
- if (!kEmitCompilerReadBarrier) {
- is_type_check_slow_path_fatal =
- (type_check_kind == TypeCheckKind::kExactCheck ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
- !instruction->CanThrowIntoCatchBlock();
- }
+ bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
SlowPathCodeMIPS* slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
instruction, is_type_check_slow_path_fatal);
@@ -4028,7 +4017,11 @@ void LocationsBuilderMIPS::VisitGoto(HGoto* got) {
}
void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
@@ -6306,6 +6299,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kFloat64:
load_type = kLoadDoubleword;
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -6459,6 +6454,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
case DataType::Type::kFloat64:
store_type = kStoreDoubleword;
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -7187,11 +7184,12 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
- case TypeCheckKind::kArrayObjectCheck:
- call_kind =
- kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
- baker_read_barrier_slow_path = kUseBakerReadBarrier;
+ case TypeCheckKind::kArrayObjectCheck: {
+ bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
+ }
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
@@ -7239,13 +7237,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
__ Sltiu(out, out, 1);
@@ -7253,13 +7253,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kAbstractClassCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
MipsLabel loop;
@@ -7269,7 +7271,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
super_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
__ Bne(out, cls, &loop);
@@ -7278,13 +7280,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kClassHierarchyCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Walk over the class hierarchy to find a match.
MipsLabel loop, success;
__ Bind(&loop);
@@ -7294,7 +7298,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
super_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
__ Bnez(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ B(&done);
@@ -7304,13 +7308,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kArrayObjectCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Do an exact check.
MipsLabel success;
__ Beq(out, cls, &success);
@@ -7320,7 +7326,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
component_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3ea7b827bb..8a06061c6a 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -55,8 +55,10 @@ Location Mips64ReturnLocation(DataType::Type return_type) {
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32:
case DataType::Type::kReference:
+ case DataType::Type::kUint64:
case DataType::Type::kInt64:
return Location::RegisterLocation(V0);
@@ -350,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- if (!is_fatal_) {
+ if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
SaveLiveRegisters(codegen, locations);
}
@@ -1079,6 +1081,10 @@ static dwarf::Reg DWARFReg(FpuRegister reg) {
void CodeGeneratorMIPS64::GenerateFrameEntry() {
__ Bind(&frame_entry_label_);
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ LOG(WARNING) << "Unimplemented hotness update in mips64 backend";
+ }
+
bool do_overflow_check =
FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod();
@@ -1761,8 +1767,14 @@ void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
GpuRegister class_reg) {
- __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
- __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+ __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+ __ LoadConst32(AT, shifted_initialized_value);
__ Bltuc(TMP, AT, slow_path->GetEntryLabel());
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
__ Sync(0);
@@ -2398,6 +2410,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -2701,6 +2715,8 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -2820,26 +2836,8 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
}
void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
- LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- switch (type_check_kind) {
- case TypeCheckKind::kExactCheck:
- case TypeCheckKind::kAbstractClassCheck:
- case TypeCheckKind::kClassHierarchyCheck:
- case TypeCheckKind::kArrayObjectCheck:
- call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
- break;
- case TypeCheckKind::kArrayCheck:
- case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCallOnSlowPath;
- break;
- }
-
+ LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -2868,18 +2866,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
Mips64Label done;
- // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
- // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
- // read barriers is done for performance and code size reasons.
- bool is_type_check_slow_path_fatal = false;
- if (!kEmitCompilerReadBarrier) {
- is_type_check_slow_path_fatal =
- (type_check_kind == TypeCheckKind::kExactCheck ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
- !instruction->CanThrowIntoCatchBlock();
- }
+ bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
SlowPathCodeMIPS64* slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
instruction, is_type_check_slow_path_fatal);
@@ -3556,7 +3543,11 @@ void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant
}
void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
@@ -4784,6 +4775,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference:
load_type = kLoadUnsignedWord;
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -4877,6 +4870,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
case DataType::Type::kFloat64:
store_type = kStoreDoubleword;
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -5504,11 +5499,12 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
- case TypeCheckKind::kArrayObjectCheck:
- call_kind =
- kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
- baker_read_barrier_slow_path = kUseBakerReadBarrier;
+ case TypeCheckKind::kArrayObjectCheck: {
+ bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
+ }
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
@@ -5556,13 +5552,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
__ Sltiu(out, out, 1);
@@ -5570,13 +5568,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kAbstractClassCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
Mips64Label loop;
@@ -5586,7 +5586,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
super_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
__ Bnec(out, cls, &loop);
@@ -5595,13 +5595,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kClassHierarchyCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Walk over the class hierarchy to find a match.
Mips64Label loop, success;
__ Bind(&loop);
@@ -5611,7 +5613,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
super_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
__ Bnezc(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ Bc(&done);
@@ -5621,13 +5623,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
}
case TypeCheckKind::kArrayObjectCheck: {
+ ReadBarrierOption read_barrier_option =
+ CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
obj_loc,
class_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// Do an exact check.
Mips64Label success;
__ Beqc(out, cls, &success);
@@ -5637,7 +5641,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
out_loc,
component_offset,
maybe_temp_loc,
- kCompilerReadBarrierOption);
+ read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 152a59c208..174efdf115 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -606,22 +606,20 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
- } else {
- __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
- }
+ __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
@@ -656,22 +654,20 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
- } else {
- __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
- }
+ __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index cc470ddb2e..7c3155ab73 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -431,13 +431,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Vmin(DataTypeValue::S16, dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vmin(DataTypeValue::U32, dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Vmin(DataTypeValue::U32, dst, lhs, rhs);
- } else {
- __ Vmin(DataTypeValue::S32, dst, lhs, rhs);
- }
+ __ Vmin(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -471,13 +471,13 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Vmax(DataTypeValue::S16, dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vmax(DataTypeValue::U32, dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Vmax(DataTypeValue::U32, dst, lhs, rhs);
- } else {
- __ Vmax(DataTypeValue::S32, dst, lhs, rhs);
- }
+ __ Vmax(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 3cf150a6b8..ed9de96496 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -613,32 +613,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Min_sH(dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Min_uW(dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Min_uW(dst, lhs, rhs);
- } else {
- __ Min_sW(dst, lhs, rhs);
- }
+ __ Min_sW(dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Min_uD(dst, lhs, rhs);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Min_uD(dst, lhs, rhs);
- } else {
- __ Min_sD(dst, lhs, rhs);
- }
+ __ Min_sD(dst, lhs, rhs);
break;
// When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value.
// TODO: Fix min(x, NaN) cases for float and double.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FminW(dst, lhs, rhs);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FminD(dst, lhs, rhs);
break;
default:
@@ -673,32 +671,30 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Max_sH(dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Max_uW(dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Max_uW(dst, lhs, rhs);
- } else {
- __ Max_sW(dst, lhs, rhs);
- }
+ __ Max_sW(dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Max_uD(dst, lhs, rhs);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Max_uD(dst, lhs, rhs);
- } else {
- __ Max_sD(dst, lhs, rhs);
- }
+ __ Max_sD(dst, lhs, rhs);
break;
// When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value.
// TODO: Fix max(x, NaN) cases for float and double.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FmaxW(dst, lhs, rhs);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FmaxD(dst, lhs, rhs);
break;
default:
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 2d69533f21..9ea55ec8d7 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -612,32 +612,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Min_sH(dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Min_uW(dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Min_uW(dst, lhs, rhs);
- } else {
- __ Min_sW(dst, lhs, rhs);
- }
+ __ Min_sW(dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Min_uD(dst, lhs, rhs);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Min_uD(dst, lhs, rhs);
- } else {
- __ Min_sD(dst, lhs, rhs);
- }
+ __ Min_sD(dst, lhs, rhs);
break;
// When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value.
// TODO: Fix min(x, NaN) cases for float and double.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FminW(dst, lhs, rhs);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FminD(dst, lhs, rhs);
break;
default:
@@ -672,32 +670,30 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ Max_sH(dst, lhs, rhs);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Max_uW(dst, lhs, rhs);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Max_uW(dst, lhs, rhs);
- } else {
- __ Max_sW(dst, lhs, rhs);
- }
+ __ Max_sW(dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Max_uD(dst, lhs, rhs);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ Max_uD(dst, lhs, rhs);
- } else {
- __ Max_sD(dst, lhs, rhs);
- }
+ __ Max_sD(dst, lhs, rhs);
break;
// When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value.
// TODO: Fix max(x, NaN) cases for float and double.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FmaxW(dst, lhs, rhs);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ FmaxD(dst, lhs, rhs);
break;
default:
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index ad8128a5b1..f2ffccc887 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -92,8 +92,8 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
__ pshufd(dst, dst, Immediate(0));
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
__ punpckldq(dst, tmp);
@@ -101,13 +101,13 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
break;
}
case DataType::Type::kFloat32:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
__ shufps(dst, dst, Immediate(0));
break;
case DataType::Type::kFloat64:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
__ shufpd(dst, dst, Immediate(0));
break;
default:
@@ -160,8 +160,8 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
__ movd(locations->Out().AsRegister<Register>(), src);
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ movd(locations->Out().AsRegisterPairLow<Register>(), src);
__ pshufd(tmp, src, Immediate(1));
__ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
@@ -640,23 +640,21 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pminsw(dst, src);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pminud(dst, src);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ pminud(dst, src);
- } else {
- __ pminsd(dst, src);
- }
+ __ pminsd(dst, src);
break;
// Next cases are sloppy wrt 0.0 vs -0.0.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ minps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ minpd(dst, src);
break;
default:
@@ -691,23 +689,21 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pmaxsw(dst, src);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmaxud(dst, src);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ pmaxud(dst, src);
- } else {
- __ pmaxsd(dst, src);
- }
+ __ pmaxsd(dst, src);
break;
// Next cases are sloppy wrt 0.0 vs -0.0.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ maxps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ maxpd(dst, src);
break;
default:
@@ -1022,8 +1018,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
__ movd(dst, locations->InAt(0).AsRegister<Register>());
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ xorps(tmp, tmp);
__ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 107030e6c2..e2b0485f89 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -623,23 +623,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pminsw(dst, src);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pminud(dst, src);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ pminud(dst, src);
- } else {
- __ pminsd(dst, src);
- }
+ __ pminsd(dst, src);
break;
// Next cases are sloppy wrt 0.0 vs -0.0.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ minps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ minpd(dst, src);
break;
default:
@@ -674,23 +672,21 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pmaxsw(dst, src);
break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmaxud(dst, src);
+ break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- if (instruction->IsUnsigned()) {
- __ pmaxud(dst, src);
- } else {
- __ pmaxsd(dst, src);
- }
+ __ pmaxsd(dst, src);
break;
// Next cases are sloppy wrt 0.0 vs -0.0.
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ maxps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- DCHECK(!instruction->IsUnsigned());
__ maxpd(dst, src);
break;
default:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 68532386e1..5fede80bc7 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1061,6 +1061,11 @@ void CodeGeneratorX86::GenerateFrameEntry() {
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()),
+ Immediate(1));
+ }
+
if (!skip_overflow_check) {
size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
__ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
@@ -1129,9 +1134,11 @@ Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32:
return Location::RegisterLocation(EAX);
+ case DataType::Type::kUint64:
case DataType::Type::kInt64:
return Location::RegisterPairLocation(EAX, EDX);
@@ -1201,6 +1208,8 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty
}
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
break;
@@ -1347,13 +1356,22 @@ void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* loc
}
void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+ __ pushl(EAX);
+ __ movl(EAX, Address(ESP, kX86WordSize));
+ __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1));
+ __ popl(EAX);
+ }
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -4830,6 +4848,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << load_type;
UNREACHABLE();
@@ -5003,6 +5023,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
@@ -5306,6 +5328,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -5557,6 +5581,8 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -6219,8 +6245,13 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
SlowPathCode* slow_path, Register class_reg) {
- __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
- Immediate(enum_cast<>(ClassStatus::kInitialized)));
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+ __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
// No need for memory fence, thanks to the X86 memory model.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1f8d822507..ae35ab5983 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1268,6 +1268,12 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ __ addw(Address(CpuRegister(kMethodRegisterArgument),
+ ArtMethod::HotnessCountOffset().Int32Value()),
+ Immediate(1));
+ }
+
if (!skip_overflow_check) {
size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
__ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
@@ -1449,13 +1455,21 @@ void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary*
}
void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
- DCHECK(!successor->IsExitBlock());
+ if (successor->IsExitBlock()) {
+ DCHECK(got->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
HBasicBlock* block = got->GetBlock();
HInstruction* previous = got->GetPrevious();
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
+ __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
+ Immediate(1));
+ }
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -2259,7 +2273,9 @@ Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Ty
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32:
+ case DataType::Type::kUint64:
case DataType::Type::kInt64:
return Location::RegisterLocation(RAX);
@@ -2328,6 +2344,8 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type
}
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
break;
@@ -4293,6 +4311,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << load_type;
UNREACHABLE();
@@ -4456,6 +4476,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
@@ -4749,6 +4771,8 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
@@ -4988,6 +5012,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
break;
}
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -5425,8 +5451,13 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
SlowPathCode* slow_path, CpuRegister class_reg) {
- __ cmpb(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
- Immediate(enum_cast<>(ClassStatus::kInitialized)));
+ constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+ const size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+ constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+ __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
// No need for memory fence, thanks to the x86-64 memory model.
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index d8ebac95a8..f4760d661f 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -34,7 +34,9 @@ void CodeSinking::Run() {
// TODO(ngeoffray): we do not profile branches yet, so use throw instructions
// as an indicator of an uncommon branch.
for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
- if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+ HInstruction* last = exit_predecessor->GetLastInstruction();
+ // Any predecessor of the exit that does not return, throws an exception.
+ if (!last->IsReturn() && !last->IsReturnVoid()) {
SinkCodeToUncommonBranch(exit_predecessor);
}
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6eda289861..ba4040acad 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -74,8 +74,8 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
class CodegenTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0);
- void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected);
+ void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0);
+ void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected);
void TestComparison(IfCondition condition,
int64_t i,
int64_t j,
@@ -83,7 +83,7 @@ class CodegenTest : public OptimizingUnitTest {
const CodegenTargetConfig target_config);
};
-void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expected) {
+void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) {
for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ResetPoolAndAllocator();
HGraph* graph = CreateCFG(data);
@@ -93,7 +93,8 @@ void CodegenTest::TestCode(const uint16_t* data, bool has_result, int32_t expect
}
}
-void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
+void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data,
+ bool has_result, int64_t expected) {
for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ResetPoolAndAllocator();
HGraph* graph = CreateCFG(data, DataType::Type::kInt64);
@@ -104,12 +105,12 @@ void CodegenTest::TestCodeLong(const uint16_t* data, bool has_result, int64_t ex
}
TEST_F(CodegenTest, ReturnVoid) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
TestCode(data);
}
TEST_F(CodegenTest, CFG1) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
@@ -117,7 +118,7 @@ TEST_F(CodegenTest, CFG1) {
}
TEST_F(CodegenTest, CFG2) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
@@ -126,21 +127,21 @@ TEST_F(CodegenTest, CFG2) {
}
TEST_F(CodegenTest, CFG3) {
- const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x200,
Instruction::RETURN_VOID,
Instruction::GOTO | 0xFF00);
TestCode(data1);
- const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_16, 3,
Instruction::RETURN_VOID,
Instruction::GOTO_16, 0xFFFF);
TestCode(data2);
- const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 4, 0,
Instruction::RETURN_VOID,
Instruction::GOTO_32, 0xFFFF, 0xFFFF);
@@ -149,7 +150,7 @@ TEST_F(CodegenTest, CFG3) {
}
TEST_F(CodegenTest, CFG4) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID,
Instruction::GOTO | 0x100,
Instruction::GOTO | 0xFE00);
@@ -158,7 +159,7 @@ TEST_F(CodegenTest, CFG4) {
}
TEST_F(CodegenTest, CFG5) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -168,7 +169,7 @@ TEST_F(CodegenTest, CFG5) {
}
TEST_F(CodegenTest, IntConstant) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
@@ -176,7 +177,7 @@ TEST_F(CodegenTest, IntConstant) {
}
TEST_F(CodegenTest, Return1) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN | 0);
@@ -184,7 +185,7 @@ TEST_F(CodegenTest, Return1) {
}
TEST_F(CodegenTest, Return2) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 0 | 1 << 8,
Instruction::RETURN | 1 << 8);
@@ -193,7 +194,7 @@ TEST_F(CodegenTest, Return2) {
}
TEST_F(CodegenTest, Return3) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::RETURN | 1 << 8);
@@ -202,7 +203,7 @@ TEST_F(CodegenTest, Return3) {
}
TEST_F(CodegenTest, ReturnIf1) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::IF_EQ, 3,
@@ -213,7 +214,7 @@ TEST_F(CodegenTest, ReturnIf1) {
}
TEST_F(CodegenTest, ReturnIf2) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::IF_EQ | 0 << 4 | 1 << 8, 3,
@@ -224,17 +225,17 @@ TEST_F(CodegenTest, ReturnIf2) {
}
// Exercise bit-wise (one's complement) not-int instruction.
-#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_F(CodegenTest, TEST_NAME) { \
- const int32_t input = INPUT; \
- const uint16_t input_lo = Low16Bits(input); \
- const uint16_t input_hi = High16Bits(input); \
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \
- Instruction::CONST | 0 << 8, input_lo, input_hi, \
- Instruction::NOT_INT | 1 << 8 | 0 << 12 , \
- Instruction::RETURN | 1 << 8); \
- \
- TestCode(data, true, EXPECTED_OUTPUT); \
+#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
+TEST_F(CodegenTest, TEST_NAME) { \
+ const int32_t input = INPUT; \
+ const uint16_t input_lo = Low16Bits(input); \
+ const uint16_t input_hi = High16Bits(input); \
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( \
+ Instruction::CONST | 0 << 8, input_lo, input_hi, \
+ Instruction::NOT_INT | 1 << 8 | 0 << 12 , \
+ Instruction::RETURN | 1 << 8); \
+ \
+ TestCode(data, true, EXPECTED_OUTPUT); \
}
NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -256,7 +257,7 @@ TEST_F(CodegenTest, TEST_NAME) { \
const uint16_t word1 = High16Bits(Low32Bits(input)); \
const uint16_t word2 = Low16Bits(High32Bits(input)); \
const uint16_t word3 = High16Bits(High32Bits(input)); /* MSW. */ \
- const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( \
+ const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( \
Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, \
Instruction::NOT_LONG | 2 << 8 | 0 << 12, \
Instruction::RETURN_WIDE | 2 << 8); \
@@ -306,7 +307,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) {
const uint16_t word1 = High16Bits(Low32Bits(input));
const uint16_t word2 = Low16Bits(High32Bits(input));
const uint16_t word3 = High16Bits(High32Bits(input)); // MSW.
- const uint16_t data[] = FIVE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = FIVE_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3,
Instruction::CONST_WIDE | 2 << 8, 1, 0, 0, 0,
Instruction::ADD_LONG | 0, 0 << 8 | 2, // v0 <- 2^32 + 1
@@ -318,7 +319,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) {
}
TEST_F(CodegenTest, ReturnAdd1) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT, 1 << 8 | 0,
@@ -328,7 +329,7 @@ TEST_F(CodegenTest, ReturnAdd1) {
}
TEST_F(CodegenTest, ReturnAdd2) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT_2ADDR | 1 << 12,
@@ -338,7 +339,7 @@ TEST_F(CodegenTest, ReturnAdd2) {
}
TEST_F(CodegenTest, ReturnAdd3) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
@@ -347,7 +348,7 @@ TEST_F(CodegenTest, ReturnAdd3) {
}
TEST_F(CodegenTest, ReturnAdd4) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT16, 3,
Instruction::RETURN);
@@ -356,7 +357,7 @@ TEST_F(CodegenTest, ReturnAdd4) {
}
TEST_F(CodegenTest, ReturnMulInt) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT, 1 << 8 | 0,
@@ -366,7 +367,7 @@ TEST_F(CodegenTest, ReturnMulInt) {
}
TEST_F(CodegenTest, ReturnMulInt2addr) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT_2ADDR | 1 << 12,
@@ -376,7 +377,7 @@ TEST_F(CodegenTest, ReturnMulInt2addr) {
}
TEST_F(CodegenTest, ReturnMulLong) {
- const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG, 2 << 8 | 0,
@@ -386,7 +387,7 @@ TEST_F(CodegenTest, ReturnMulLong) {
}
TEST_F(CodegenTest, ReturnMulLong2addr) {
- const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG_2ADDR | 2 << 12,
@@ -396,7 +397,7 @@ TEST_F(CodegenTest, ReturnMulLong2addr) {
}
TEST_F(CodegenTest, ReturnMulIntLit8) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
@@ -405,7 +406,7 @@ TEST_F(CodegenTest, ReturnMulIntLit8) {
}
TEST_F(CodegenTest, ReturnMulIntLit16) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT16, 3,
Instruction::RETURN);
@@ -578,7 +579,7 @@ TEST_F(CodegenTest, MaterializedCondition2) {
}
TEST_F(CodegenTest, ReturnDivIntLit8) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::DIV_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
@@ -587,7 +588,7 @@ TEST_F(CodegenTest, ReturnDivIntLit8) {
}
TEST_F(CodegenTest, ReturnDivInt2Addr) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0,
Instruction::CONST_4 | 2 << 12 | 1 << 8,
Instruction::DIV_INT_2ADDR | 1 << 12,
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index e1980e080e..d27104752b 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -36,7 +36,7 @@ class ConstantFoldingTest : public OptimizingUnitTest {
public:
ConstantFoldingTest() : graph_(nullptr) { }
- void TestCode(const uint16_t* data,
+ void TestCode(const std::vector<uint16_t>& data,
const std::string& expected_before,
const std::string& expected_after_cf,
const std::string& expected_after_dce,
@@ -100,7 +100,7 @@ class ConstantFoldingTest : public OptimizingUnitTest {
* return v1 2. return v1
*/
TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 1 << 12,
Instruction::NEG_INT | 1 << 8 | 0 << 12,
Instruction::RETURN | 1 << 8);
@@ -161,7 +161,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) {
const uint16_t word1 = High16Bits(Low32Bits(input));
const uint16_t word2 = Low16Bits(High32Bits(input));
const uint16_t word3 = High16Bits(High32Bits(input)); // MSW.
- const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3,
Instruction::NEG_LONG | 2 << 8 | 0 << 12,
Instruction::RETURN_WIDE | 2 << 8);
@@ -219,7 +219,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) {
* return v2 4. return v2
*/
TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 1 << 12,
Instruction::CONST_4 | 1 << 8 | 2 << 12,
Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
@@ -284,7 +284,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) {
* return v2 8. return v2
*/
TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 1 << 12,
Instruction::CONST_4 | 1 << 8 | 2 << 12,
Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12,
@@ -369,7 +369,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) {
* return v2 4. return v2
*/
TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 3 << 12,
Instruction::CONST_4 | 1 << 8 | 2 << 12,
Instruction::SUB_INT | 2 << 8, 0 | 1 << 8,
@@ -432,7 +432,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) {
* return (v4, v5) 6. return-wide v4
*/
TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) {
- const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE_16 | 0 << 8, 1,
Instruction::CONST_WIDE_16 | 2 << 8, 2,
Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8,
@@ -496,7 +496,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) {
* return (v4, v5) 6. return-wide v4
*/
TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) {
- const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM(
Instruction::CONST_WIDE_16 | 0 << 8, 3,
Instruction::CONST_WIDE_16 | 2 << 8, 2,
Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8,
@@ -569,7 +569,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) {
* return v2 13. return v2
*/
TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 1 << 12,
Instruction::CONST_4 | 1 << 8 | 2 << 12,
Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
@@ -672,7 +672,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) {
* return-void 7. return
*/
TEST_F(ConstantFoldingTest, ConstantCondition) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::CONST_4 | 0 << 8 | 0 << 12,
Instruction::IF_GEZ | 1 << 8, 3,
diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h
index e389bad3ad..e2cf7a80fe 100644
--- a/compiler/optimizing/data_type-inl.h
+++ b/compiler/optimizing/data_type-inl.h
@@ -53,7 +53,9 @@ constexpr char DataType::TypeId(DataType::Type type) {
case DataType::Type::kInt8: return 'b'; // Java byte (B).
case DataType::Type::kUint16: return 'c'; // Java char (C).
case DataType::Type::kInt16: return 's'; // Java short (S).
+ case DataType::Type::kUint32: return 'u'; // Picked 'u' for unsigned.
case DataType::Type::kInt32: return 'i'; // Java int (I).
+ case DataType::Type::kUint64: return 'w'; // Picked 'w' for long unsigned.
case DataType::Type::kInt64: return 'j'; // Java long (J).
case DataType::Type::kFloat32: return 'f'; // Java float (F).
case DataType::Type::kFloat64: return 'd'; // Java double (D).
diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc
index 3c99a76c17..cb354f46cc 100644
--- a/compiler/optimizing/data_type.cc
+++ b/compiler/optimizing/data_type.cc
@@ -25,7 +25,9 @@ static const char* kTypeNames[] = {
"Int8",
"Uint16",
"Int16",
+ "Uint32",
"Int32",
+ "Uint64",
"Int64",
"Float32",
"Float64",
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 548fe28cee..4a6c91459f 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -34,7 +34,9 @@ class DataType {
kInt8,
kUint16,
kInt16,
+ kUint32,
kInt32,
+ kUint64,
kInt64,
kFloat32,
kFloat64,
@@ -55,9 +57,11 @@ class DataType {
case Type::kUint16:
case Type::kInt16:
return 1;
+ case Type::kUint32:
case Type::kInt32:
case Type::kFloat32:
return 2;
+ case Type::kUint64:
case Type::kInt64:
case Type::kFloat64:
return 3;
@@ -80,9 +84,11 @@ class DataType {
case Type::kUint16:
case Type::kInt16:
return 2;
+ case Type::kUint32:
case Type::kInt32:
case Type::kFloat32:
return 4;
+ case Type::kUint64:
case Type::kInt64:
case Type::kFloat64:
return 8;
@@ -107,7 +113,9 @@ class DataType {
case Type::kInt8:
case Type::kUint16:
case Type::kInt16:
+ case Type::kUint32:
case Type::kInt32:
+ case Type::kUint64:
case Type::kInt64:
return true;
default:
@@ -120,11 +128,12 @@ class DataType {
}
static bool Is64BitType(Type type) {
- return type == Type::kInt64 || type == Type::kFloat64;
+ return type == Type::kUint64 || type == Type::kInt64 || type == Type::kFloat64;
}
static bool IsUnsignedType(Type type) {
- return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16;
+ return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16 ||
+ type == Type::kUint32 || type == Type::kUint64;
}
// Return the general kind of `type`, fusing integer-like types as Type::kInt.
@@ -133,10 +142,14 @@ class DataType {
case Type::kBool:
case Type::kUint8:
case Type::kInt8:
- case Type::kInt16:
case Type::kUint16:
+ case Type::kInt16:
+ case Type::kUint32:
case Type::kInt32:
return Type::kInt32;
+ case Type::kUint64:
+ case Type::kInt64:
+ return Type::kInt64;
default:
return type;
}
@@ -154,8 +167,12 @@ class DataType {
return std::numeric_limits<uint16_t>::min();
case Type::kInt16:
return std::numeric_limits<int16_t>::min();
+ case Type::kUint32:
+ return std::numeric_limits<uint32_t>::min();
case Type::kInt32:
return std::numeric_limits<int32_t>::min();
+ case Type::kUint64:
+ return std::numeric_limits<uint64_t>::min();
case Type::kInt64:
return std::numeric_limits<int64_t>::min();
default:
@@ -176,8 +193,12 @@ class DataType {
return std::numeric_limits<uint16_t>::max();
case Type::kInt16:
return std::numeric_limits<int16_t>::max();
+ case Type::kUint32:
+ return std::numeric_limits<uint32_t>::max();
case Type::kInt32:
return std::numeric_limits<int32_t>::max();
+ case Type::kUint64:
+ return std::numeric_limits<uint64_t>::max();
case Type::kInt64:
return std::numeric_limits<int64_t>::max();
default:
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 3cc7b0e78d..9fa0f72e80 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -146,6 +146,141 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi
}
}
+static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) {
+ // Test for an if as last statement.
+ if (!block->EndsWithIf()) {
+ return false;
+ }
+ HIf* ifs = block->GetLastInstruction()->AsIf();
+ // Find either:
+ // if obj == null
+ // throws
+ // else
+ // not_throws
+ // or:
+ // if obj != null
+ // not_throws
+ // else
+ // throws
+ HInstruction* cond = ifs->InputAt(0);
+ HBasicBlock* not_throws = nullptr;
+ if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) {
+ not_throws = ifs->IfFalseSuccessor();
+ } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) {
+ not_throws = ifs->IfTrueSuccessor();
+ } else {
+ return false;
+ }
+ DCHECK(cond->IsEqual() || cond->IsNotEqual());
+ HInstruction* obj = cond->InputAt(1);
+ if (obj->IsNullConstant()) {
+ obj = cond->InputAt(0);
+ } else if (!cond->InputAt(0)->IsNullConstant()) {
+ return false;
+ }
+ // Scan all uses of obj and find null check under control dependence.
+ HBoundType* bound = nullptr;
+ const HUseList<HInstruction*>& uses = obj->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end;) {
+ HInstruction* user = it->GetUser();
+ ++it; // increment before possibly replacing
+ if (user->IsNullCheck()) {
+ HBasicBlock* user_block = user->GetBlock();
+ if (user_block != block &&
+ user_block != throws &&
+ block->Dominates(user_block)) {
+ if (bound == nullptr) {
+ ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
+ bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj);
+ bound->SetUpperBound(ti, /*can_be_null*/ false);
+ bound->SetReferenceTypeInfo(ti);
+ bound->SetCanBeNull(false);
+ not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction());
+ }
+ user->ReplaceWith(bound);
+ user_block->RemoveInstruction(user);
+ }
+ }
+ }
+ return bound != nullptr;
+}
+
+// Simplify the pattern:
+//
+// B1
+// / \
+// | foo() // always throws
+// \ goto B2
+// \ /
+// B2
+//
+// Into:
+//
+// B1
+// / \
+// | foo()
+// | goto Exit
+// | |
+// B2 Exit
+//
+// Rationale:
+// Removal of the never taken edge to B2 may expose
+// other optimization opportunities, such as code sinking.
+bool HDeadCodeElimination::SimplifyAlwaysThrows() {
+ // Make sure exceptions go to exit.
+ if (graph_->HasTryCatch()) {
+ return false;
+ }
+ HBasicBlock* exit = graph_->GetExitBlock();
+ if (exit == nullptr) {
+ return false;
+ }
+
+ bool rerun_dominance_and_loop_analysis = false;
+
+ // Order does not matter, just pick one.
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ HInstruction* first = block->GetFirstInstruction();
+ HInstruction* last = block->GetLastInstruction();
+ // Ensure only one throwing instruction appears before goto.
+ if (first->AlwaysThrows() &&
+ first->GetNext() == last &&
+ last->IsGoto() &&
+ block->GetPhis().IsEmpty() &&
+ block->GetPredecessors().size() == 1u) {
+ DCHECK_EQ(block->GetSuccessors().size(), 1u);
+ HBasicBlock* pred = block->GetSinglePredecessor();
+ HBasicBlock* succ = block->GetSingleSuccessor();
+ // Ensure no computations are merged through throwing block.
+ // This does not prevent the optimization per se, but would
+ // require an elaborate clean up of the SSA graph.
+ if (succ != exit &&
+ !block->Dominates(pred) &&
+ pred->Dominates(succ) &&
+ succ->GetPredecessors().size() > 1u &&
+ succ->GetPhis().IsEmpty()) {
+ block->ReplaceSuccessor(succ, exit);
+ rerun_dominance_and_loop_analysis = true;
+ MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
+ // Perform a quick follow up optimization on object != null control dependences
+ // that is much cheaper to perform now than in a later phase.
+ if (RemoveNonNullControlDependences(pred, block)) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
+ }
+ }
+ }
+ }
+
+ // We need to re-analyze the graph in order to run DCE afterwards.
+ if (rerun_dominance_and_loop_analysis) {
+ graph_->ClearLoopInformation();
+ graph_->ClearDominanceInformation();
+ graph_->BuildDominatorTree();
+ return true;
+ }
+ return false;
+}
+
// Simplify the pattern:
//
// B1 B2 ...
@@ -381,6 +516,7 @@ void HDeadCodeElimination::Run() {
// Simplify graph to generate more dead block patterns.
ConnectSuccessiveBlocks();
bool did_any_simplification = false;
+ did_any_simplification |= SimplifyAlwaysThrows();
did_any_simplification |= SimplifyIfs();
did_any_simplification |= RemoveDeadBlocks();
if (did_any_simplification) {
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 84fd890eee..92a7f562e1 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -40,6 +40,7 @@ class HDeadCodeElimination : public HOptimization {
void MaybeRecordSimplifyIf();
bool RemoveDeadBlocks();
void RemoveDeadInstructions();
+ bool SimplifyAlwaysThrows();
bool SimplifyIfs();
void ConnectSuccessiveBlocks();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 929572ee3b..adb6ce1187 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -29,12 +29,12 @@ namespace art {
class DeadCodeEliminationTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data,
+ void TestCode(const std::vector<uint16_t>& data,
const std::string& expected_before,
const std::string& expected_after);
};
-void DeadCodeEliminationTest::TestCode(const uint16_t* data,
+void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data,
const std::string& expected_before,
const std::string& expected_after) {
HGraph* graph = CreateCFG(data);
@@ -73,7 +73,7 @@ void DeadCodeEliminationTest::TestCode(const uint16_t* data,
* return-void 7. return
*/
TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::CONST_4 | 0 << 8 | 0 << 12,
Instruction::IF_GEZ | 1 << 8, 3,
@@ -135,7 +135,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
* return 13. return-void
*/
TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 0 << 12,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::ADD_INT | 2 << 8, 0 | 1 << 8,
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 572466eec8..1d72ba116e 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -26,10 +26,12 @@ namespace art {
class OptimizerTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length);
+ void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length);
};
-void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) {
+void OptimizerTest::TestCode(const std::vector<uint16_t>& data,
+ const uint32_t* blocks,
+ size_t blocks_length) {
HGraph* graph = CreateCFG(data);
ASSERT_EQ(graph->GetBlocks().size(), blocks_length);
for (size_t i = 0, e = blocks_length; i < e; ++i) {
@@ -49,7 +51,7 @@ void OptimizerTest::TestCode(const uint16_t* data, const uint32_t* blocks, size_
}
TEST_F(OptimizerTest, ReturnVoid) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID); // Block number 1
const uint32_t dominators[] = {
@@ -62,7 +64,7 @@ TEST_F(OptimizerTest, ReturnVoid) {
}
TEST_F(OptimizerTest, CFG1) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100, // Block number 1
Instruction::RETURN_VOID); // Block number 2
@@ -77,7 +79,7 @@ TEST_F(OptimizerTest, CFG1) {
}
TEST_F(OptimizerTest, CFG2) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100, // Block number 1
Instruction::GOTO | 0x100, // Block number 2
Instruction::RETURN_VOID); // Block number 3
@@ -94,7 +96,7 @@ TEST_F(OptimizerTest, CFG2) {
}
TEST_F(OptimizerTest, CFG3) {
- const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x200, // Block number 1
Instruction::RETURN_VOID, // Block number 2
Instruction::GOTO | 0xFF00); // Block number 3
@@ -109,14 +111,14 @@ TEST_F(OptimizerTest, CFG3) {
TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
- const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_16, 3,
Instruction::RETURN_VOID,
Instruction::GOTO_16, 0xFFFF);
TestCode(data2, dominators, sizeof(dominators) / sizeof(int));
- const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 4, 0,
Instruction::RETURN_VOID,
Instruction::GOTO_32, 0xFFFF, 0xFFFF);
@@ -125,7 +127,7 @@ TEST_F(OptimizerTest, CFG3) {
}
TEST_F(OptimizerTest, CFG4) {
- const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM(
Instruction::NOP,
Instruction::GOTO | 0xFF00);
@@ -138,14 +140,14 @@ TEST_F(OptimizerTest, CFG4) {
TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
- const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 0, 0);
TestCode(data2, dominators, sizeof(dominators) / sizeof(int));
}
TEST_F(OptimizerTest, CFG5) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID, // Block number 1
Instruction::GOTO | 0x100, // Dead block
Instruction::GOTO | 0xFE00); // Block number 2
@@ -162,7 +164,7 @@ TEST_F(OptimizerTest, CFG5) {
}
TEST_F(OptimizerTest, CFG6) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -181,7 +183,7 @@ TEST_F(OptimizerTest, CFG6) {
}
TEST_F(OptimizerTest, CFG7) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3, // Block number 1
Instruction::GOTO | 0x100, // Block number 2
@@ -201,7 +203,7 @@ TEST_F(OptimizerTest, CFG7) {
}
TEST_F(OptimizerTest, CFG8) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3, // Block number 1
Instruction::GOTO | 0x200, // Block number 2
@@ -222,7 +224,7 @@ TEST_F(OptimizerTest, CFG8) {
}
TEST_F(OptimizerTest, CFG9) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3, // Block number 1
Instruction::GOTO | 0x200, // Block number 2
@@ -243,7 +245,7 @@ TEST_F(OptimizerTest, CFG9) {
}
TEST_F(OptimizerTest, CFG10) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6, // Block number 1
Instruction::IF_EQ, 3, // Block number 2
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index b799fb4688..75b8e9609e 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -31,7 +31,7 @@ class FindLoopsTest : public OptimizingUnitTest {};
TEST_F(FindLoopsTest, CFG1) {
// Constant is not used.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
@@ -42,7 +42,7 @@ TEST_F(FindLoopsTest, CFG1) {
}
TEST_F(FindLoopsTest, CFG2) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
@@ -53,7 +53,7 @@ TEST_F(FindLoopsTest, CFG2) {
}
TEST_F(FindLoopsTest, CFG3) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT_2ADDR | 1 << 12,
@@ -67,7 +67,7 @@ TEST_F(FindLoopsTest, CFG3) {
}
TEST_F(FindLoopsTest, CFG4) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -82,7 +82,7 @@ TEST_F(FindLoopsTest, CFG4) {
}
TEST_F(FindLoopsTest, CFG5) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -126,7 +126,7 @@ TEST_F(FindLoopsTest, Loop1) {
// while (a == a) {
// }
// return;
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0xFE00,
@@ -150,7 +150,7 @@ TEST_F(FindLoopsTest, Loop2) {
// while (a == a) {
// }
// return a;
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x400,
Instruction::IF_EQ, 4,
@@ -173,7 +173,7 @@ TEST_F(FindLoopsTest, Loop2) {
TEST_F(FindLoopsTest, Loop3) {
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -197,7 +197,7 @@ TEST_F(FindLoopsTest, Loop3) {
TEST_F(FindLoopsTest, Loop4) {
// Test loop with originally two back edges.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6,
Instruction::IF_EQ, 3,
@@ -221,7 +221,7 @@ TEST_F(FindLoopsTest, Loop4) {
TEST_F(FindLoopsTest, Loop5) {
// Test loop with two exit edges.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6,
Instruction::IF_EQ, 3,
@@ -244,7 +244,7 @@ TEST_F(FindLoopsTest, Loop5) {
}
TEST_F(FindLoopsTest, InnerLoop) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6,
Instruction::IF_EQ, 3,
@@ -273,7 +273,7 @@ TEST_F(FindLoopsTest, InnerLoop) {
}
TEST_F(FindLoopsTest, TwoLoops) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0xFE00, // first loop
@@ -301,7 +301,7 @@ TEST_F(FindLoopsTest, TwoLoops) {
}
TEST_F(FindLoopsTest, NonNaturalLoop) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x0100,
@@ -317,7 +317,7 @@ TEST_F(FindLoopsTest, NonNaturalLoop) {
}
TEST_F(FindLoopsTest, DoWhileLoop) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x0100,
Instruction::IF_EQ, 0xFFFF,
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index b1ac027a68..c88baa8610 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -31,7 +31,15 @@ namespace art {
using android::base::StringPrintf;
static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) {
- return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid();
+ // Anything that returns is allowed to jump into the exit block.
+ if (instruction->IsReturn() || instruction->IsReturnVoid()) {
+ return true;
+ }
+ // Anything that always throws is allowed to jump into the exit block.
+ if (instruction->IsGoto() && instruction->GetPrevious() != nullptr) {
+ instruction = instruction->GetPrevious();
+ }
+ return instruction->AlwaysThrows();
}
static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 9ca3e4953a..08bfa5d80f 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -22,7 +22,7 @@ namespace art {
class GraphCheckerTest : public OptimizingUnitTest {
protected:
HGraph* CreateSimpleCFG();
- void TestCode(const uint16_t* data);
+ void TestCode(const std::vector<uint16_t>& data);
};
/**
@@ -48,7 +48,7 @@ HGraph* GraphCheckerTest::CreateSimpleCFG() {
return graph;
}
-void GraphCheckerTest::TestCode(const uint16_t* data) {
+void GraphCheckerTest::TestCode(const std::vector<uint16_t>& data) {
HGraph* graph = CreateCFG(data);
ASSERT_NE(graph, nullptr);
@@ -58,14 +58,14 @@ void GraphCheckerTest::TestCode(const uint16_t* data) {
}
TEST_F(GraphCheckerTest, ReturnVoid) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID);
TestCode(data);
}
TEST_F(GraphCheckerTest, CFG1) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
@@ -73,7 +73,7 @@ TEST_F(GraphCheckerTest, CFG1) {
}
TEST_F(GraphCheckerTest, CFG2) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -83,7 +83,7 @@ TEST_F(GraphCheckerTest, CFG2) {
}
TEST_F(GraphCheckerTest, CFG3) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -128,7 +128,7 @@ TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) {
TEST_F(GraphCheckerTest, SSAPhi) {
// This code creates one Phi function during the conversion to SSA form.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 0,
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 12c69889ab..6144162f68 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -533,20 +533,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
VisitVecBinaryOperation(hadd);
- StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
}
- void VisitVecMin(HVecMin* min) OVERRIDE {
- VisitVecBinaryOperation(min);
- StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
- }
-
- void VisitVecMax(HVecMax* max) OVERRIDE {
- VisitVecBinaryOperation(max);
- StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
- }
-
void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
VisitVecOperation(instruction);
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index b2ad8ec400..035e5ce3e1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -392,6 +392,35 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
return single_impl;
}
+static bool AlwaysThrows(ArtMethod* method)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ CodeItemDataAccessor accessor(method->DexInstructionData());
+ // Skip native methods, methods with try blocks, and methods that are too large.
+ if (!accessor.HasCodeItem() ||
+ accessor.TriesSize() != 0 ||
+ accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) {
+ return false;
+ }
+ // Scan for exits.
+ bool throw_seen = false;
+ for (const DexInstructionPcPair& pair : accessor) {
+ switch (pair.Inst().Opcode()) {
+ case Instruction::RETURN:
+ case Instruction::RETURN_VOID:
+ case Instruction::RETURN_WIDE:
+ case Instruction::RETURN_OBJECT:
+ case Instruction::RETURN_VOID_NO_BARRIER:
+ return false; // found regular control flow back
+ case Instruction::THROW:
+ throw_seen = true;
+ break;
+ default:
+ break;
+ }
+ }
+ return throw_seen;
+}
+
bool HInliner::TryInline(HInvoke* invoke_instruction) {
if (invoke_instruction->IsInvokeUnresolved() ||
invoke_instruction->IsInvokePolymorphic()) {
@@ -431,20 +460,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
}
if (actual_method != nullptr) {
+ // Single target.
bool result = TryInlineAndReplace(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
/* do_rtp */ true,
cha_devirtualize);
- if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
- if (cha_devirtualize) {
- // Add dependency due to devirtulization. We've assumed resolved_method
- // has single implementation.
- outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
- MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline);
- } else {
- MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
+ if (result) {
+ // Successfully inlined.
+ if (!invoke_instruction->IsInvokeStaticOrDirect()) {
+ if (cha_devirtualize) {
+ // Add dependency due to devirtualization. We've assumed resolved_method
+ // has single implementation.
+ outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
+ MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline);
+ } else {
+ MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
+ }
}
+ } else if (!cha_devirtualize && AlwaysThrows(actual_method)) {
+ // Set always throws property for non-inlined method call with single target
+ // (unless it was obtained through CHA, because that would imply we have
+ // to add the CHA dependency, which seems not worth it).
+ invoke_instruction->SetAlwaysThrows(true);
}
return result;
}
@@ -1381,7 +1419,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
- CodeItemDataAccessor accessor(method);
+ CodeItemDataAccessor accessor(method->DexInstructionData());
if (!accessor.HasCodeItem()) {
LOG_FAIL_NO_STAT()
@@ -1660,7 +1698,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
const DexFile& callee_dex_file = *resolved_method->GetDexFile();
uint32_t method_index = resolved_method->GetDexMethodIndex();
- CodeItemDebugInfoAccessor code_item_accessor(callee_dex_file, code_item);
+ CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo());
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(),
caller_compilation_unit_.GetDexCache(),
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index e81d97b0a8..02465d37ba 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -18,7 +18,7 @@
#define ART_COMPILER_OPTIMIZING_INLINER_H_
#include "dex/dex_file_types.h"
-#include "invoke_type.h"
+#include "dex/invoke_type.h"
#include "jit/profile_compilation_info.h"
#include "optimization.h"
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 72a93c1f77..64a1eccf60 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -49,7 +49,7 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph,
const DexCompilationUnit* outer_compilation_unit,
CompilerDriver* compiler_driver,
CodeGenerator* code_generator,
- const uint8_t* interpreter_metadata,
+ ArrayRef<const uint8_t> interpreter_metadata,
OptimizingCompilerStats* compiler_stats,
VariableSizedHandleScope* handles,
ScopedArenaAllocator* local_allocator)
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 708a09711a..4428c53277 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+#include "base/array_ref.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "data_type.h"
@@ -57,7 +58,7 @@ class HInstructionBuilder : public ValueObject {
const DexCompilationUnit* outer_compilation_unit,
CompilerDriver* compiler_driver,
CodeGenerator* code_generator,
- const uint8_t* interpreter_metadata,
+ ArrayRef<const uint8_t> interpreter_metadata,
OptimizingCompilerStats* compiler_stats,
VariableSizedHandleScope* handles,
ScopedArenaAllocator* local_allocator);
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 6928b70df7..acb830e524 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,9 +19,9 @@
#include "art_field-inl.h"
#include "art_method-inl.h"
#include "class_linker.h"
+#include "dex/invoke_type.h"
#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
-#include "invoke_type.h"
#include "mirror/dex_cache-inl.h"
#include "nodes.h"
#include "scoped_thread_state_change-inl.h"
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca1b451e6b..2f8e33f941 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 99b8b5df74..830d0403e4 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 113c9de5a2..cafa5228d9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 521bad27e2..89f1818be2 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index baa410b884..46b7f3f1ce 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6dd8b8e1f5..6483b7cb2a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 43b63a73ef..9fa5b74c62 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -35,11 +35,12 @@ namespace art {
class LinearizeTest : public OptimizingUnitTest {
protected:
template <size_t number_of_blocks>
- void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]);
+ void TestCode(const std::vector<uint16_t>& data,
+ const uint32_t (&expected_order)[number_of_blocks]);
};
template <size_t number_of_blocks>
-void LinearizeTest::TestCode(const uint16_t* data,
+void LinearizeTest::TestCode(const std::vector<uint16_t>& data,
const uint32_t (&expected_order)[number_of_blocks]) {
HGraph* graph = CreateCFG(data);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
@@ -68,7 +69,7 @@ TEST_F(LinearizeTest, CFG1) {
// + / \ +
// Block4 Block8
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 5,
Instruction::IF_EQ, 0xFFFE,
@@ -93,7 +94,7 @@ TEST_F(LinearizeTest, CFG2) {
// + / \ +
// Block5 Block8
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::RETURN_VOID,
@@ -119,7 +120,7 @@ TEST_F(LinearizeTest, CFG3) {
// Block6 + Block9
// | +
// Block4 ++
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::RETURN_VOID,
@@ -149,7 +150,7 @@ TEST_F(LinearizeTest, CFG4) {
// + / \ +
// Block5 Block11
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 7,
Instruction::IF_EQ, 0xFFFE,
@@ -179,7 +180,7 @@ TEST_F(LinearizeTest, CFG5) {
// +/ \ +
// Block6 Block11
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::RETURN_VOID,
@@ -205,7 +206,7 @@ TEST_F(LinearizeTest, CFG6) {
// Block5 <- Block9 Block6 +
// |
// Block7
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x0100,
Instruction::IF_EQ, 0x0004,
@@ -233,7 +234,7 @@ TEST_F(LinearizeTest, CFG7) {
// |
// Block7
//
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x0100,
Instruction::IF_EQ, 0x0005,
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index e45d7c820c..66660662e4 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -31,10 +31,10 @@ namespace art {
class LiveRangesTest : public OptimizingUnitTest {
public:
- HGraph* BuildGraph(const uint16_t* data);
+ HGraph* BuildGraph(const std::vector<uint16_t>& data);
};
-HGraph* LiveRangesTest::BuildGraph(const uint16_t* data) {
+HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) {
HGraph* graph = CreateCFG(data);
// Suspend checks implementation may change in the future, and this test relies
// on how instructions are ordered.
@@ -57,7 +57,7 @@ TEST_F(LiveRangesTest, CFG1) {
* |
* 12: exit
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
@@ -102,7 +102,7 @@ TEST_F(LiveRangesTest, CFG2) {
* |
* 26: exit
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -151,7 +151,7 @@ TEST_F(LiveRangesTest, CFG3) {
* |
* 28: exit
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -225,7 +225,7 @@ TEST_F(LiveRangesTest, Loop1) {
* 30: exit
*/
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -304,7 +304,7 @@ TEST_F(LiveRangesTest, Loop2) {
* We want to make sure the phi at 10 has a lifetime hole after the add at 20.
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6,
Instruction::ADD_INT, 0, 0,
@@ -378,7 +378,7 @@ TEST_F(LiveRangesTest, CFG4) {
*
* We want to make sure the constant0 has a lifetime hole after the 16: add.
*/
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::IF_EQ, 5,
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 35bc4ff8b3..6621a03568 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -31,7 +31,7 @@ namespace art {
class LivenessTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data, const char* expected);
+ void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
static void DumpBitVector(BitVector* vector,
@@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector,
buffer << ")\n";
}
-void LivenessTest::TestCode(const uint16_t* data, const char* expected) {
+void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) {
HGraph* graph = CreateCFG(data);
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
@@ -86,7 +86,7 @@ TEST_F(LivenessTest, CFG1) {
" kill: (0)\n";
// Constant is not used.
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
@@ -108,7 +108,7 @@ TEST_F(LivenessTest, CFG2) {
" live out: (0)\n"
" kill: (0)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
@@ -134,7 +134,7 @@ TEST_F(LivenessTest, CFG3) {
" live out: (000)\n"
" kill: (000)\n";
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT_2ADDR | 1 << 12,
@@ -181,7 +181,7 @@ TEST_F(LivenessTest, CFG4) {
" live out: (0000)\n"
" kill: (0000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -228,7 +228,7 @@ TEST_F(LivenessTest, CFG5) {
" live out: (000)\n"
" kill: (000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -273,7 +273,7 @@ TEST_F(LivenessTest, Loop1) {
" kill: (000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -318,7 +318,7 @@ TEST_F(LivenessTest, Loop3) {
" live out: (0000)\n"
" kill: (0000)\n";
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -370,7 +370,7 @@ TEST_F(LivenessTest, Loop4) {
" live out: (000)\n"
" kill: (000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x500,
Instruction::IF_EQ, 5,
@@ -425,7 +425,7 @@ TEST_F(LivenessTest, Loop5) {
" live out: (0001)\n"
" kill: (0001)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -475,7 +475,7 @@ TEST_F(LivenessTest, Loop6) {
" live out: (0000)\n"
" kill: (0000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 8,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -530,7 +530,7 @@ TEST_F(LivenessTest, Loop7) {
" live out: (00000)\n"
" kill: (00000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 8,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -580,7 +580,7 @@ TEST_F(LivenessTest, Loop8) {
" live out: (000)\n"
" kill: (000)\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 6,
Instruction::ADD_INT, 0, 0,
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 88326d321b..8b4eae1780 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -25,6 +25,51 @@
#include <iostream>
+/**
+ * The general algorithm of load-store elimination (LSE).
+ * Load-store analysis in the previous pass collects a list of heap locations
+ * and does alias analysis of those heap locations.
+ * LSE keeps track of a list of heap values corresponding to the heap
+ * locations. It visits basic blocks in reverse post order and for
+ * each basic block, visits instructions sequentially, and processes
+ * instructions as follows:
+ * - If the instruction is a load, and the heap location for that load has a
+ * valid heap value, the load can be eliminated. In order to maintain the
+ * validity of all heap locations during the optimization phase, the real
+ * elimination is delayed till the end of LSE.
+ * - If the instruction is a store, it updates the heap value for the heap
+ * location of the store with the store instruction. The real heap value
+ * can be fetched from the store instruction. Heap values are invalidated
+ * for heap locations that may alias with the store instruction's heap
+ * location. The store instruction can be eliminated unless the value stored
+ * is later needed e.g. by a load from the same/aliased heap location or
+ * the heap location persists at method return/deoptimization.
+ * The store instruction is also needed if it's not used to track the heap
+ * value anymore, e.g. when it fails to merge with the heap values from other
+ * predecessors.
+ * - A store that stores the same value as the heap value is eliminated.
+ * - The list of heap values are merged at basic block entry from the basic
+ * block's predecessors. The algorithm is single-pass, so loop side-effects is
+ * used as best effort to decide if a heap location is stored inside the loop.
+ * - A special type of objects called singletons are instantiated in the method
+ * and have a single name, i.e. no aliases. Singletons have exclusive heap
+ * locations since they have no aliases. Singletons are helpful in narrowing
+ * down the life span of a heap location such that they do not always
+ * need to participate in merging heap values. Allocation of a singleton
+ * can be eliminated if that singleton is not used and does not persist
+ * at method return/deoptimization.
+ * - For newly instantiated instances, their heap values are initialized to
+ * language defined default values.
+ * - Some instructions such as invokes are treated as loading and invalidating
+ * all the heap values, depending on the instruction's side effects.
+ * - Finalizable objects are considered as persisting at method
+ * return/deoptimization.
+ * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad
+ * and VecStore instructions.
+ * - Currently this LSE algorithm doesn't handle graph with try-catch, due to
+ * the special block merging structure.
+ */
+
namespace art {
// An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
@@ -59,8 +104,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
removed_loads_(allocator_.Adapter(kArenaAllocLSE)),
substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)),
possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)),
- singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
- singleton_new_arrays_(allocator_.Adapter(kArenaAllocLSE)) {
+ singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -88,19 +132,26 @@ class LSEVisitor : public HGraphDelegateVisitor {
return type_conversion;
}
- // Find an instruction's substitute if it should be removed.
+ // Find an instruction's substitute if it's a removed load.
// Return the same instruction if it should not be removed.
HInstruction* FindSubstitute(HInstruction* instruction) {
+ if (!IsLoad(instruction)) {
+ return instruction;
+ }
size_t size = removed_loads_.size();
for (size_t i = 0; i < size; i++) {
if (removed_loads_[i] == instruction) {
- return substitute_instructions_for_loads_[i];
+ HInstruction* substitute = substitute_instructions_for_loads_[i];
+ // The substitute list is a flat hierarchy.
+ DCHECK_EQ(FindSubstitute(substitute), substitute);
+ return substitute;
}
}
return instruction;
}
void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
+ DCHECK(IsLoad(load));
DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
"Unexpected heap_value that has a substitute " << heap_value->DebugName();
removed_loads_.push_back(load);
@@ -207,28 +258,59 @@ class LSEVisitor : public HGraphDelegateVisitor {
new_instance->GetBlock()->RemoveInstruction(new_instance);
}
}
- for (HInstruction* new_array : singleton_new_arrays_) {
- size_t removed = HConstructorFence::RemoveConstructorFences(new_array);
- MaybeRecordStat(stats_,
- MethodCompilationStat::kConstructorFenceRemovedLSE,
- removed);
+ }
- if (!new_array->HasNonEnvironmentUses()) {
- new_array->RemoveEnvironmentUsers();
- new_array->GetBlock()->RemoveInstruction(new_array);
- }
+ private:
+ static bool IsLoad(HInstruction* instruction) {
+ if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
+ return false;
}
+ // Unresolved load is not treated as a load.
+ return instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsArrayGet();
}
- private:
- // If heap_values[index] is an instance field store, need to keep the store.
- // This is necessary if a heap value is killed due to merging, or loop side
- // effects (which is essentially merging also), since a load later from the
- // location won't be eliminated.
+ static bool IsStore(HInstruction* instruction) {
+ if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
+ return false;
+ }
+ // Unresolved store is not treated as a store.
+ return instruction->IsInstanceFieldSet() ||
+ instruction->IsArraySet() ||
+ instruction->IsStaticFieldSet();
+ }
+
+ // Returns the real heap value by finding its substitute or by "peeling"
+ // a store instruction.
+ HInstruction* GetRealHeapValue(HInstruction* heap_value) {
+ if (IsLoad(heap_value)) {
+ return FindSubstitute(heap_value);
+ }
+ if (!IsStore(heap_value)) {
+ return heap_value;
+ }
+
+ // We keep track of store instructions as the heap values which might be
+ // eliminated if the stores are later found not necessary. The real stored
+ // value needs to be fetched from the store instruction.
+ if (heap_value->IsInstanceFieldSet()) {
+ heap_value = heap_value->AsInstanceFieldSet()->GetValue();
+ } else if (heap_value->IsStaticFieldSet()) {
+ heap_value = heap_value->AsStaticFieldSet()->GetValue();
+ } else {
+ DCHECK(heap_value->IsArraySet());
+ heap_value = heap_value->AsArraySet()->GetValue();
+ }
+ // heap_value may already be a removed load.
+ return FindSubstitute(heap_value);
+ }
+
+ // If heap_value is a store, need to keep the store.
+ // This is necessary if a heap value is killed or replaced by another value,
+ // so that the store is no longer used to track heap value.
void KeepIfIsStore(HInstruction* heap_value) {
- if (heap_value == kDefaultHeapValue ||
- heap_value == kUnknownHeapValue ||
- !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
+ if (!IsStore(heap_value)) {
return;
}
auto idx = std::find(possibly_removed_stores_.begin(),
@@ -239,26 +321,41 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
+ // If a heap location X may alias with heap location at `loc_index`
+ // and heap_values of that heap location X holds a store, keep that store.
+ // It's needed for a dependent load that's not eliminated since any store
+ // that may put value into the load's heap location needs to be kept.
+ void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values,
+ size_t loc_index) {
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) {
+ KeepIfIsStore(heap_values[i]);
+ }
+ }
+ }
+
void HandleLoopSideEffects(HBasicBlock* block) {
DCHECK(block->IsLoopHeader());
int block_id = block->GetBlockId();
ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+ HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+ ScopedArenaVector<HInstruction*>& pre_header_heap_values =
+ heap_values_for_[pre_header->GetBlockId()];
- // Don't eliminate loads in irreducible loops. This is safe for singletons, because
- // they are always used by the non-eliminated loop-phi.
+ // Don't eliminate loads in irreducible loops.
+ // Also keep the stores before the loop.
if (block->GetLoopInformation()->IsIrreducible()) {
if (kIsDebugBuild) {
for (size_t i = 0; i < heap_values.size(); i++) {
DCHECK_EQ(heap_values[i], kUnknownHeapValue);
}
}
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ KeepIfIsStore(pre_header_heap_values[i]);
+ }
return;
}
- HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
- ScopedArenaVector<HInstruction*>& pre_header_heap_values =
- heap_values_for_[pre_header->GetBlockId()];
-
// Inherit the values from pre-header.
for (size_t i = 0; i < heap_values.size(); i++) {
heap_values[i] = pre_header_heap_values[i];
@@ -270,18 +367,17 @@ class LSEVisitor : public HGraphDelegateVisitor {
for (size_t i = 0; i < heap_values.size(); i++) {
HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
ReferenceInfo* ref_info = location->GetReferenceInfo();
- if (ref_info->IsSingletonAndRemovable() &&
- !location->IsValueKilledByLoopSideEffects()) {
- // A removable singleton's field that's not stored into inside a loop is
+ if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) {
+ // A singleton's field that's not stored into inside a loop is
// invariant throughout the loop. Nothing to do.
} else {
- // heap value is killed by loop side effects (stored into directly, or
- // due to aliasing). Or the heap value may be needed after method return
- // or deoptimization.
+ // heap value is killed by loop side effects.
KeepIfIsStore(pre_header_heap_values[i]);
heap_values[i] = kUnknownHeapValue;
}
}
+ } else {
+ // The loop doesn't kill any value.
}
}
@@ -300,45 +396,73 @@ class LSEVisitor : public HGraphDelegateVisitor {
ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
for (size_t i = 0; i < heap_values.size(); i++) {
HInstruction* merged_value = nullptr;
+ // If we can merge the store itself from the predecessors, we keep
+ // the store as the heap value as long as possible. In case we cannot
+ // merge the store, we try to merge the values of the stores.
+ HInstruction* merged_store_value = nullptr;
// Whether merged_value is a result that's merged from all predecessors.
bool from_all_predecessors = true;
ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+ HInstruction* ref = ref_info->GetReference();
HInstruction* singleton_ref = nullptr;
if (ref_info->IsSingleton()) {
- // We do more analysis of liveness when merging heap values for such
- // cases since stores into such references may potentially be eliminated.
- singleton_ref = ref_info->GetReference();
+ // We do more analysis based on singleton's liveness when merging
+ // heap values for such cases.
+ singleton_ref = ref;
}
for (HBasicBlock* predecessor : predecessors) {
HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i];
+ if (!IsStore(pred_value)) {
+ pred_value = FindSubstitute(pred_value);
+ }
+ DCHECK(pred_value != nullptr);
+ HInstruction* pred_store_value = GetRealHeapValue(pred_value);
if ((singleton_ref != nullptr) &&
!singleton_ref->GetBlock()->Dominates(predecessor)) {
- // singleton_ref is not live in this predecessor. Skip this predecessor since
- // it does not really have the location.
+ // singleton_ref is not live in this predecessor. No need to merge
+ // since singleton_ref is not live at the beginning of this block.
DCHECK_EQ(pred_value, kUnknownHeapValue);
from_all_predecessors = false;
- continue;
+ break;
}
if (merged_value == nullptr) {
// First seen heap value.
+ DCHECK(pred_value != nullptr);
merged_value = pred_value;
} else if (pred_value != merged_value) {
// There are conflicting values.
merged_value = kUnknownHeapValue;
+ // We may still be able to merge store values.
+ }
+
+ // Conflicting stores may be storing the same value. We do another merge
+ // of real stored values.
+ if (merged_store_value == nullptr) {
+ // First seen store value.
+ DCHECK(pred_store_value != nullptr);
+ merged_store_value = pred_store_value;
+ } else if (pred_store_value != merged_store_value) {
+ // There are conflicting store values.
+ merged_store_value = kUnknownHeapValue;
+ // There must be conflicting stores also.
+ DCHECK_EQ(merged_value, kUnknownHeapValue);
+ // No need to merge anymore.
break;
}
}
- if (ref_info->IsSingleton()) {
- if (ref_info->IsSingletonAndNonRemovable() ||
- (merged_value == kUnknownHeapValue &&
- !block->IsSingleReturnOrReturnVoidAllowingPhis())) {
- // The heap value may be needed after method return or deoptimization,
- // or there are conflicting heap values from different predecessors and
- // this block is not a single return,
- // keep the last store in each predecessor since future loads may not
- // be eliminated.
+ if (merged_value == nullptr) {
+ DCHECK(!from_all_predecessors);
+ DCHECK(singleton_ref != nullptr);
+ }
+ if (from_all_predecessors) {
+ if (ref_info->IsSingletonAndRemovable() &&
+ block->IsSingleReturnOrReturnVoidAllowingPhis()) {
+ // Values in the singleton are not needed anymore.
+ } else if (!IsStore(merged_value)) {
+ // We don't track merged value as a store anymore. We have to
+ // hold the stores in predecessors live here.
for (HBasicBlock* predecessor : predecessors) {
ScopedArenaVector<HInstruction*>& pred_values =
heap_values_for_[predecessor->GetBlockId()];
@@ -346,18 +470,33 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
} else {
- // Currenctly we don't eliminate stores to non-singletons.
+ DCHECK(singleton_ref != nullptr);
+ // singleton_ref is non-existing at the beginning of the block. There is
+ // no need to keep the stores.
}
- if ((merged_value == nullptr) || !from_all_predecessors) {
+ if (!from_all_predecessors) {
DCHECK(singleton_ref != nullptr);
DCHECK((singleton_ref->GetBlock() == block) ||
- !singleton_ref->GetBlock()->Dominates(block));
+ !singleton_ref->GetBlock()->Dominates(block))
+ << "method: " << GetGraph()->GetMethodName();
// singleton_ref is not defined before block or defined only in some of its
// predecessors, so block doesn't really have the location at its entry.
heap_values[i] = kUnknownHeapValue;
- } else {
+ } else if (predecessors.size() == 1) {
+ // Inherit heap value from the single predecessor.
+ DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value);
heap_values[i] = merged_value;
+ } else {
+ DCHECK(merged_value == kUnknownHeapValue ||
+ merged_value == kDefaultHeapValue ||
+ merged_value->GetBlock()->Dominates(block));
+ if (merged_value != kUnknownHeapValue) {
+ heap_values[i] = merged_value;
+ } else {
+ // Stores in different predecessors may be storing the same value.
+ heap_values[i] = merged_store_value;
+ }
}
}
}
@@ -423,23 +562,12 @@ class LSEVisitor : public HGraphDelegateVisitor {
heap_values[idx] = constant;
return;
}
- if (heap_value != kUnknownHeapValue) {
- if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
- HInstruction* store = heap_value;
- // This load must be from a singleton since it's from the same
- // field/element that a "removed" store puts the value. That store
- // must be to a singleton's field/element.
- DCHECK(ref_info->IsSingleton());
- // Get the real heap value of the store.
- heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
- // heap_value may already have a substitute.
- heap_value = FindSubstitute(heap_value);
- }
- }
+ heap_value = GetRealHeapValue(heap_value);
if (heap_value == kUnknownHeapValue) {
// Load isn't eliminated. Put the load as the value into the HeapLocation.
// This acts like GVN but with better aliasing analysis.
heap_values[idx] = instruction;
+ KeepStoresIfAliasedToLocation(heap_values, idx);
} else {
if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) {
// The only situation where the same heap location has different type is when
@@ -452,6 +580,10 @@ class LSEVisitor : public HGraphDelegateVisitor {
DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName();
DCHECK(instruction->IsArrayGet()) << instruction->DebugName();
}
+ // Load isn't eliminated. Put the load as the value into the HeapLocation.
+ // This acts like GVN but with better aliasing analysis.
+ heap_values[idx] = instruction;
+ KeepStoresIfAliasedToLocation(heap_values, idx);
return;
}
AddRemovedLoad(instruction, heap_value);
@@ -460,12 +592,21 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
bool Equal(HInstruction* heap_value, HInstruction* value) {
+ DCHECK(!IsStore(value)) << value->DebugName();
+ if (heap_value == kUnknownHeapValue) {
+ // Don't compare kUnknownHeapValue with other values.
+ return false;
+ }
if (heap_value == value) {
return true;
}
if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) {
return true;
}
+ HInstruction* real_heap_value = GetRealHeapValue(heap_value);
+ if (real_heap_value != heap_value) {
+ return Equal(real_heap_value, value);
+ }
return false;
}
@@ -476,6 +617,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
size_t vector_length,
int16_t declaring_class_def_index,
HInstruction* value) {
+ DCHECK(!IsStore(value)) << value->DebugName();
// value may already have a substitute.
value = FindSubstitute(value);
HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
@@ -486,59 +628,47 @@ class LSEVisitor : public HGraphDelegateVisitor {
ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
HInstruction* heap_value = heap_values[idx];
- bool same_value = false;
bool possibly_redundant = false;
+
if (Equal(heap_value, value)) {
// Store into the heap location with the same value.
- same_value = true;
- } else if (index != nullptr &&
- heap_location_collector_.GetHeapLocation(idx)->HasAliasedLocations()) {
- // For array element, don't eliminate stores if the location can be aliased
- // (due to either ref or index aliasing).
- } else if (ref_info->IsSingleton()) {
- // Store into a field/element of a singleton. The value cannot be killed due to
- // aliasing/invocation. It can be redundant since future loads can
- // directly get the value set by this instruction. The value can still be killed due to
- // merging or loop side effects. Stores whose values are killed due to merging/loop side
- // effects later will be removed from possibly_removed_stores_ when that is detected.
- // Stores whose values may be needed after method return or deoptimization
- // are also removed from possibly_removed_stores_ when that is detected.
- possibly_redundant = true;
+ // This store can be eliminated right away.
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ } else {
HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
- if (loop_info != nullptr) {
- // instruction is a store in the loop so the loop must does write.
+ if (loop_info == nullptr) {
+ // Store is not in a loop. We try to precisely track the heap value by
+ // the store.
+ possibly_redundant = true;
+ } else if (!loop_info->IsIrreducible()) {
+ // instruction is a store in the loop so the loop must do write.
DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
-
- if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
- DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
- // Keep the store since its value may be needed at the loop header.
- possibly_redundant = false;
- } else {
- // The singleton is created inside the loop. Value stored to it isn't needed at
+ if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) {
+ // original_ref is created inside the loop. Value stored to it isn't needed at
// the loop header. This is true for outer loops also.
+ possibly_redundant = true;
+ } else {
+ // Keep the store since its value may be needed at the loop header.
}
+ } else {
+ // Keep the store inside irreducible loops.
}
}
- if (same_value || possibly_redundant) {
+ if (possibly_redundant) {
possibly_removed_stores_.push_back(instruction);
}
- if (!same_value) {
- if (possibly_redundant) {
- DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
- // Put the store as the heap value. If the value is loaded from heap
- // by a load later, this store isn't really redundant.
- heap_values[idx] = instruction;
- } else {
- heap_values[idx] = value;
- }
- }
+ // Put the store as the heap value. If the value is loaded or needed after
+ // return/deoptimization later, this store isn't really redundant.
+ heap_values[idx] = instruction;
+
// This store may kill values in other heap locations due to aliasing.
for (size_t i = 0; i < heap_values.size(); i++) {
if (i == idx) {
continue;
}
- if (heap_values[i] == value) {
+ if (Equal(heap_values[i], value)) {
// Same value should be kept even if aliasing happens.
continue;
}
@@ -547,7 +677,9 @@ class LSEVisitor : public HGraphDelegateVisitor {
continue;
}
if (heap_location_collector_.MayAlias(i, idx)) {
- // Kill heap locations that may alias.
+ // Kill heap locations that may alias and as a result if the heap value
+ // is a store, the store needs to be kept.
+ KeepIfIsStore(heap_values[i]);
heap_values[i] = kUnknownHeapValue;
}
}
@@ -633,24 +765,35 @@ class LSEVisitor : public HGraphDelegateVisitor {
const ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
for (HInstruction* heap_value : heap_values) {
- // Filter out fake instructions before checking instruction kind below.
- if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) {
- continue;
- }
// A store is kept as the heap value for possibly removed stores.
- if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
- // Check whether the reference for a store is used by an environment local of
- // HDeoptimize.
+ // That value stored is generally observeable after deoptimization, except
+ // for singletons that don't escape after deoptimization.
+ if (IsStore(heap_value)) {
+ if (heap_value->IsStaticFieldSet()) {
+ KeepIfIsStore(heap_value);
+ continue;
+ }
HInstruction* reference = heap_value->InputAt(0);
- DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton());
- for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
- HEnvironment* user = use.GetUser();
- if (user->GetHolder() == instruction) {
- // The singleton for the store is visible at this deoptimization
- // point. Need to keep the store so that the heap value is
- // seen by the interpreter.
+ if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) {
+ if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) {
+ // Finalizable objects alway escape.
KeepIfIsStore(heap_value);
+ continue;
+ }
+ // Check whether the reference for a store is used by an environment local of
+ // HDeoptimize. If not, the singleton is not observed after
+ // deoptimizion.
+ for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+ HEnvironment* user = use.GetUser();
+ if (user->GetHolder() == instruction) {
+ // The singleton for the store is visible at this deoptimization
+ // point. Need to keep the store so that the heap value is
+ // seen by the interpreter.
+ KeepIfIsStore(heap_value);
+ }
}
+ } else {
+ KeepIfIsStore(heap_value);
}
}
}
@@ -691,9 +834,12 @@ class LSEVisitor : public HGraphDelegateVisitor {
// Singleton references cannot be seen by the callee.
} else {
if (side_effects.DoesAnyRead()) {
+ // Invocation may read the heap value.
KeepIfIsStore(heap_values[i]);
}
if (side_effects.DoesAnyWrite()) {
+ // Keep the store since it's not used to track the heap value anymore.
+ KeepIfIsStore(heap_values[i]);
heap_values[i] = kUnknownHeapValue;
}
}
@@ -758,7 +904,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
return;
}
if (ref_info->IsSingletonAndRemovable()) {
- singleton_new_arrays_.push_back(new_array);
+ singleton_new_instances_.push_back(new_array);
}
ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[new_array->GetBlock()->GetBlockId()];
@@ -791,7 +937,6 @@ class LSEVisitor : public HGraphDelegateVisitor {
ScopedArenaVector<HInstruction*> possibly_removed_stores_;
ScopedArenaVector<HInstruction*> singleton_new_instances_;
- ScopedArenaVector<HInstruction*> singleton_new_arrays_;
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 3dc1ef7534..899496328e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -30,46 +30,6 @@
namespace art {
-// TODO: Clean up the packed type detection so that we have the right type straight away
-// and do not need to go through this normalization.
-static inline void NormalizePackedType(/* inout */ DataType::Type* type,
- /* inout */ bool* is_unsigned) {
- switch (*type) {
- case DataType::Type::kBool:
- DCHECK(!*is_unsigned);
- break;
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- if (*is_unsigned) {
- *is_unsigned = false;
- *type = DataType::Type::kUint8;
- } else {
- *type = DataType::Type::kInt8;
- }
- break;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- if (*is_unsigned) {
- *is_unsigned = false;
- *type = DataType::Type::kUint16;
- } else {
- *type = DataType::Type::kInt16;
- }
- break;
- case DataType::Type::kInt32:
- case DataType::Type::kInt64:
- // We do not have kUint32 and kUint64 at the moment.
- break;
- case DataType::Type::kFloat32:
- case DataType::Type::kFloat64:
- DCHECK(!*is_unsigned);
- break;
- default:
- LOG(FATAL) << "Unexpected type " << *type;
- UNREACHABLE();
- }
-}
-
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
@@ -1362,8 +1322,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
if (VectorizeUse(node, r, generate_code, type, restrictions)) {
if (generate_code) {
- NormalizePackedType(&type, &is_unsigned);
- GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
+ GenerateVecOp(instruction,
+ vector_map_->Get(r),
+ nullptr,
+ HVecOperation::ToProperType(type, is_unsigned));
}
return true;
}
@@ -1865,18 +1827,26 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
case Intrinsics::kMathMinLongLong:
case Intrinsics::kMathMinFloatFloat:
case Intrinsics::kMathMinDoubleDouble: {
- NormalizePackedType(&type, &is_unsigned);
vector = new (global_allocator_)
- HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc);
+ HVecMin(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc);
break;
}
case Intrinsics::kMathMaxIntInt:
case Intrinsics::kMathMaxLongLong:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMaxDoubleDouble: {
- NormalizePackedType(&type, &is_unsigned);
vector = new (global_allocator_)
- HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc);
+ HVecMax(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc);
break;
}
default:
@@ -1987,15 +1957,13 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
VectorizeUse(node, s, generate_code, type, restrictions)) {
if (generate_code) {
if (vector_mode_ == kVector) {
- NormalizePackedType(&type, &is_unsigned);
vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
global_allocator_,
vector_map_->Get(r),
vector_map_->Get(s),
- type,
+ HVecOperation::ToProperType(type, is_unsigned),
vector_length_,
is_rounded,
- is_unsigned,
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
} else {
@@ -2086,7 +2054,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
if (generate_code) {
- NormalizePackedType(&reduction_type, &is_unsigned);
+ reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned);
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
global_allocator_,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 727431a493..91e475d737 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -865,6 +865,15 @@ void HLoopInformation::Populate() {
graph->SetHasLoops(true);
}
+void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) {
+ DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this);
+ blocks_.Union(&inner_loop->blocks_);
+ HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation();
+ if (outer_loop != nullptr) {
+ outer_loop->PopulateInnerLoopUpwards(this);
+ }
+}
+
HBasicBlock* HLoopInformation::GetPreHeader() const {
HBasicBlock* block = header_->GetPredecessors()[0];
DCHECK(irreducible_ || (block == header_->GetDominator()));
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d4382c6b4c..43ca2cf874 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -32,11 +32,11 @@
#include "deoptimization_kind.h"
#include "dex/dex_file.h"
#include "dex/dex_file_types.h"
+#include "dex/invoke_type.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
#include "handle_scope.h"
#include "intrinsics_enum.h"
-#include "invoke_type.h"
#include "locations.h"
#include "method_reference.h"
#include "mirror/class.h"
@@ -826,6 +826,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
// Finds blocks that are part of this loop.
void Populate();
+ // Updates blocks population of the loop and all of its outer' ones recursively after the
+ // population of the inner loop is updated.
+ void PopulateInnerLoopUpwards(HLoopInformation* inner_loop);
+
// Returns whether this loop information contains `block`.
// Note that this loop information *must* be populated before entering this function.
bool Contains(const HBasicBlock& block) const;
@@ -856,6 +860,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
bool HasExitEdge() const;
+ // Resets back edge and blocks-in-loop data.
+ void ResetBasicBlockData() {
+ back_edges_.clear();
+ ClearAllBlocks();
+ }
+
private:
// Internal recursive implementation of `Populate`.
void PopulateRecursive(HBasicBlock* block);
@@ -998,6 +1008,18 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
loop_information_->AddBackEdge(back_edge);
}
+ // Registers a back edge; if the block was not a loop header before the call associates a newly
+ // created loop info with it.
+ //
+ // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop
+ // info for all blocks during back edges recalculation.
+ void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) {
+ if (loop_information_ == nullptr || loop_information_->GetHeader() != this) {
+ loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_);
+ }
+ loop_information_->AddBackEdge(back_edge);
+ }
+
HGraph* GetGraph() const { return graph_; }
void SetGraph(HGraph* graph) { graph_ = graph; }
@@ -2018,6 +2040,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
// could throw OOME, but it is still OK to remove them if they are unused.
virtual bool CanThrow() const { return false; }
+
+ // Does the instruction always throw an exception unconditionally?
+ virtual bool AlwaysThrows() const { return false; }
+
bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -4169,6 +4195,10 @@ class HInvoke : public HVariableInputSizeInstruction {
bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
+ void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); }
+
+ bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); }
+
bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
@@ -4199,7 +4229,8 @@ class HInvoke : public HVariableInputSizeInstruction {
static constexpr size_t kFieldReturnTypeSize =
MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize;
- static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1;
+ static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1;
+ static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1;
static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>;
using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>;
@@ -6575,6 +6606,8 @@ class HThrow FINAL : public HTemplateInstruction<1> {
bool CanThrow() const OVERRIDE { return true; }
+ bool AlwaysThrows() const OVERRIDE { return true; }
+
DECLARE_INSTRUCTION(Throw);
protected:
@@ -7298,19 +7331,19 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr);
class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
public:
explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
- : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {}
+ : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
void VisitInstruction(HInstruction* instruction) OVERRIDE {
if (instruction->IsClonable()) {
ReplaceInstrOrPhiByClone(instruction);
- instr_replaced_by_clones_count++;
+ instr_replaced_by_clones_count_++;
}
}
- size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; }
+ size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; }
private:
- size_t instr_replaced_by_clones_count;
+ size_t instr_replaced_by_clones_count_;
DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
};
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 87dff8403b..ecabdf3b76 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -131,8 +131,6 @@ class HVecOperation : public HVariableInputSizeInstruction {
}
// Maps an integral type to the same-size signed type and leaves other types alone.
- // Can be used to test relaxed type consistency in which packed same-size integral
- // types can co-exist, but other type mixes are an error.
static DataType::Type ToSignedType(DataType::Type type) {
switch (type) {
case DataType::Type::kBool: // 1-byte storage unit
@@ -160,6 +158,11 @@ class HVecOperation : public HVariableInputSizeInstruction {
}
}
+ // Maps an integral type to the same-size (un)signed type. Leaves other types alone.
+ static DataType::Type ToProperType(DataType::Type type, bool is_unsigned) {
+ return is_unsigned ? ToUnsignedType(type) : ToSignedType(type);
+ }
+
// Helper method to determine if an instruction returns a SIMD value.
// TODO: This method is needed until we introduce SIMD as proper type.
static bool ReturnsSIMDValue(HInstruction* instruction) {
@@ -286,6 +289,8 @@ class HVecMemoryOperation : public HVecOperation {
};
// Packed type consistency checker ("same vector length" integral types may mix freely).
+// Tests relaxed type consistency in which packed same-size integral types can co-exist,
+// but other type mixes are an error.
inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type type) {
if (input->IsPhi()) {
return input->GetType() == HVecOperation::kSIMDType; // carries SIMD
@@ -518,7 +523,7 @@ class HVecAdd FINAL : public HVecBinaryOperation {
// Performs halving add on every component in the two vectors, viz.
// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
-// for either both signed or both unsigned operands x, y.
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
class HVecHalvingAdd FINAL : public HVecBinaryOperation {
public:
HVecHalvingAdd(ArenaAllocator* allocator,
@@ -527,21 +532,13 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation {
DataType::Type packed_type,
size_t vector_length,
bool is_rounded,
- bool is_unsigned,
uint32_t dex_pc)
: HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
- // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
- // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
- DCHECK(!is_unsigned ||
- packed_type == DataType::Type::kInt32 ||
- packed_type == DataType::Type::kInt64) << packed_type;
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
- SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned);
SetPackedFlag<kFieldHAddIsRounded>(is_rounded);
}
- bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); }
bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); }
bool CanBeMoved() const OVERRIDE { return true; }
@@ -549,9 +546,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation {
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
DCHECK(other->IsVecHalvingAdd());
const HVecHalvingAdd* o = other->AsVecHalvingAdd();
- return HVecOperation::InstructionDataEquals(o) &&
- IsUnsigned() == o->IsUnsigned() &&
- IsRounded() == o->IsRounded();
+ return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded();
}
DECLARE_INSTRUCTION(VecHalvingAdd);
@@ -561,8 +556,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation {
private:
// Additional packed bits.
- static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
- static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1;
+ static constexpr size_t kFieldHAddIsRounded = HVecOperation::kNumberOfVectorOpPackedBits;
static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1;
static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
};
@@ -638,7 +632,7 @@ class HVecDiv FINAL : public HVecBinaryOperation {
// Takes minimum of every component in the two vectors,
// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ]
-// for either both signed or both unsigned operands x, y.
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
class HVecMin FINAL : public HVecBinaryOperation {
public:
HVecMin(ArenaAllocator* allocator,
@@ -646,44 +640,23 @@ class HVecMin FINAL : public HVecBinaryOperation {
HInstruction* right,
DataType::Type packed_type,
size_t vector_length,
- bool is_unsigned,
uint32_t dex_pc)
: HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
- // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
- // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
- DCHECK(!is_unsigned ||
- packed_type == DataType::Type::kInt32 ||
- packed_type == DataType::Type::kInt64) << packed_type;
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
- SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
}
- bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
-
bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
- DCHECK(other->IsVecMin());
- const HVecMin* o = other->AsVecMin();
- return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned();
- }
-
DECLARE_INSTRUCTION(VecMin);
protected:
DEFAULT_COPY_CONSTRUCTOR(VecMin);
-
- private:
- // Additional packed bits.
- static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
- static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
- static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
};
// Takes maximum of every component in the two vectors,
// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ]
-// for either both signed or both unsigned operands x, y.
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
class HVecMax FINAL : public HVecBinaryOperation {
public:
HVecMax(ArenaAllocator* allocator,
@@ -691,39 +664,18 @@ class HVecMax FINAL : public HVecBinaryOperation {
HInstruction* right,
DataType::Type packed_type,
size_t vector_length,
- bool is_unsigned,
uint32_t dex_pc)
: HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
- // The `is_unsigned` flag should be used exclusively with the Int32 or Int64.
- // This flag is a temporary measure while we do not have the Uint32 and Uint64 data types.
- DCHECK(!is_unsigned ||
- packed_type == DataType::Type::kInt32 ||
- packed_type == DataType::Type::kInt64) << packed_type;
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
- SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
}
- bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
-
bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
- DCHECK(other->IsVecMax());
- const HVecMax* o = other->AsVecMax();
- return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned();
- }
-
DECLARE_INSTRUCTION(VecMax);
protected:
DEFAULT_COPY_CONSTRUCTOR(VecMax);
-
- private:
- // Additional packed bits.
- static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
- static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
- static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
};
// Bitwise-ands every component in the two vectors,
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index ab9d7594d9..af13449646 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -282,143 +282,53 @@ TEST_F(NodesVectorTest, VectorAlignmentMattersOnStore) {
EXPECT_FALSE(v0->Equals(v1)); // no longer equal
}
-TEST_F(NodesVectorTest, VectorSignMattersOnMin) {
- HVecOperation* p0 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc);
- HVecOperation* p1 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc);
- HVecOperation* p2 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc);
-
- HVecMin* v0 = new (GetAllocator()) HVecMin(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc);
- HVecMin* v1 = new (GetAllocator()) HVecMin(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc);
- HVecMin* v2 = new (GetAllocator()) HVecMin(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc);
- HVecMin* v3 = new (GetAllocator()) HVecMin(
- GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc);
- HVecMin* v4 = new (GetAllocator()) HVecMin(
- GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc);
- HVecMin* v5 = new (GetAllocator()) HVecMin(
- GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc);
- HVecMin* v6 = new (GetAllocator()) HVecMin(
- GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc);
- HVecMin* min_insns[] = { v0, v1, v2, v3, v4, v5, v6 };
-
- EXPECT_FALSE(p0->CanBeMoved());
- EXPECT_FALSE(p1->CanBeMoved());
- EXPECT_FALSE(p2->CanBeMoved());
-
- for (HVecMin* min_insn : min_insns) {
- EXPECT_TRUE(min_insn->CanBeMoved());
- }
-
- // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
- EXPECT_TRUE(v0->IsUnsigned());
- EXPECT_FALSE(v1->IsUnsigned());
- EXPECT_TRUE(v2->IsUnsigned());
-
- for (HVecMin* min_insn1 : min_insns) {
- for (HVecMin* min_insn2 : min_insns) {
- EXPECT_EQ(min_insn1 == min_insn2, min_insn1->Equals(min_insn2));
- }
- }
-}
-
-TEST_F(NodesVectorTest, VectorSignMattersOnMax) {
- HVecOperation* p0 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc);
- HVecOperation* p1 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc);
- HVecOperation* p2 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc);
-
- HVecMax* v0 = new (GetAllocator()) HVecMax(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ true, kNoDexPc);
- HVecMax* v1 = new (GetAllocator()) HVecMax(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_unsigned*/ false, kNoDexPc);
- HVecMax* v2 = new (GetAllocator()) HVecMax(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 2, /*is_unsigned*/ true, kNoDexPc);
- HVecMax* v3 = new (GetAllocator()) HVecMax(
- GetAllocator(), p1, p1, DataType::Type::kUint8, 16, /*is_unsigned*/ false, kNoDexPc);
- HVecMax* v4 = new (GetAllocator()) HVecMax(
- GetAllocator(), p1, p1, DataType::Type::kInt8, 16, /*is_unsigned*/ false, kNoDexPc);
- HVecMax* v5 = new (GetAllocator()) HVecMax(
- GetAllocator(), p2, p2, DataType::Type::kUint16, 8, /*is_unsigned*/ false, kNoDexPc);
- HVecMax* v6 = new (GetAllocator()) HVecMax(
- GetAllocator(), p2, p2, DataType::Type::kInt16, 8, /*is_unsigned*/ false, kNoDexPc);
- HVecMax* max_insns[] = { v0, v1, v2, v3, v4, v5, v6 };
-
- EXPECT_FALSE(p0->CanBeMoved());
- EXPECT_FALSE(p1->CanBeMoved());
- EXPECT_FALSE(p2->CanBeMoved());
-
- for (HVecMax* max_insn : max_insns) {
- EXPECT_TRUE(max_insn->CanBeMoved());
- }
-
- // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
- EXPECT_TRUE(v0->IsUnsigned());
- EXPECT_FALSE(v1->IsUnsigned());
- EXPECT_TRUE(v2->IsUnsigned());
-
- for (HVecMax* max_insn1 : max_insns) {
- for (HVecMax* max_insn2 : max_insns) {
- EXPECT_EQ(max_insn1 == max_insn2, max_insn1->Equals(max_insn2));
- }
- }
-}
-
TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) {
+ HVecOperation* u0 = new (GetAllocator())
+ HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kUint32, 4, kNoDexPc);
+ HVecOperation* u1 = new (GetAllocator())
+ HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kUint16, 8, kNoDexPc);
+ HVecOperation* u2 = new (GetAllocator())
+ HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kUint8, 16, kNoDexPc);
+
HVecOperation* p0 = new (GetAllocator())
HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc);
HVecOperation* p1 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 4, kNoDexPc);
+ HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 8, kNoDexPc);
HVecOperation* p2 = new (GetAllocator())
- HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 4, kNoDexPc);
+ HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 16, kNoDexPc);
HVecHalvingAdd* v0 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4,
- /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc);
+ GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v1 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4,
- /*is_rounded*/ false, /*is_unsigned*/ true, kNoDexPc);
+ GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ false, kNoDexPc);
HVecHalvingAdd* v2 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4,
- /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v3 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 4,
- /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, kNoDexPc);
+
HVecHalvingAdd* v4 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p0, p0, DataType::Type::kInt32, 2,
- /*is_rounded*/ true, /*is_unsigned*/ true, kNoDexPc);
+ GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v5 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p1, p1, DataType::Type::kUint8, 16,
- /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ false, kNoDexPc);
HVecHalvingAdd* v6 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p1, p1, DataType::Type::kUint8, 16,
- /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v7 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p1, p1, DataType::Type::kInt8, 16,
- /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ false, kNoDexPc);
+
HVecHalvingAdd* v8 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p1, p1, DataType::Type::kInt8, 16,
- /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v9 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p2, p2, DataType::Type::kUint16, 8,
- /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ false, kNoDexPc);
HVecHalvingAdd* v10 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p2, p2, DataType::Type::kUint16, 8,
- /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc);
+ GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ true, kNoDexPc);
HVecHalvingAdd* v11 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p2, p2, DataType::Type::kInt16, 2,
- /*is_rounded*/ true, /*is_unsigned*/ false, kNoDexPc);
- HVecHalvingAdd* v12 = new (GetAllocator()) HVecHalvingAdd(
- GetAllocator(), p2, p2, DataType::Type::kInt16, 2,
- /*is_rounded*/ false, /*is_unsigned*/ false, kNoDexPc);
- HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 };
+ GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ false, kNoDexPc);
+ HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 };
+
+ EXPECT_FALSE(u0->CanBeMoved());
+ EXPECT_FALSE(u1->CanBeMoved());
+ EXPECT_FALSE(u2->CanBeMoved());
EXPECT_FALSE(p0->CanBeMoved());
EXPECT_FALSE(p1->CanBeMoved());
EXPECT_FALSE(p2->CanBeMoved());
@@ -427,26 +337,18 @@ TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) {
EXPECT_TRUE(hadd_insn->CanBeMoved());
}
- // Deprecated; IsUnsigned() should be removed with the introduction of Uint32 and Uint64.
- EXPECT_TRUE(v0->IsUnsigned());
- EXPECT_TRUE(v1->IsUnsigned());
- EXPECT_TRUE(!v2->IsUnsigned());
- EXPECT_TRUE(!v3->IsUnsigned());
- EXPECT_TRUE(v4->IsUnsigned());
-
EXPECT_TRUE(v0->IsRounded());
EXPECT_TRUE(!v1->IsRounded());
EXPECT_TRUE(v2->IsRounded());
EXPECT_TRUE(!v3->IsRounded());
EXPECT_TRUE(v4->IsRounded());
- EXPECT_TRUE(v5->IsRounded());
- EXPECT_TRUE(!v6->IsRounded());
- EXPECT_TRUE(v7->IsRounded());
- EXPECT_TRUE(!v8->IsRounded());
- EXPECT_TRUE(v9->IsRounded());
- EXPECT_TRUE(!v10->IsRounded());
- EXPECT_TRUE(v11->IsRounded());
- EXPECT_TRUE(!v12->IsRounded());
+ EXPECT_TRUE(!v5->IsRounded());
+ EXPECT_TRUE(v6->IsRounded());
+ EXPECT_TRUE(!v7->IsRounded());
+ EXPECT_TRUE(v8->IsRounded());
+ EXPECT_TRUE(!v9->IsRounded());
+ EXPECT_TRUE(v10->IsRounded());
+ EXPECT_TRUE(!v11->IsRounded());
for (HVecHalvingAdd* hadd_insn1 : hadd_insns) {
for (HVecHalvingAdd* hadd_insn2 : hadd_insns) {
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index e2b2106f65..d20b681b49 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -41,7 +41,7 @@ namespace art {
// Run the tests only on host.
#ifndef ART_TARGET_ANDROID
-class OptimizingCFITest : public CFITest {
+class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
public:
// Enable this flag to generate the expected outputs.
static constexpr bool kGenerateExpected = false;
@@ -63,7 +63,7 @@ class OptimizingCFITest : public CFITest {
// Setup simple context.
std::string error;
isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error);
- graph_ = CreateGraph(&pool_and_allocator_);
+ graph_ = CreateGraph();
// Generate simple frame with some spills.
code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
code_gen_->GetAssembler()->cfi().SetEnabled(true);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f4115f7e7b..b3f23a0dcd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -382,6 +382,9 @@ class OptimizingCompiler FINAL : public Compiler {
PassObserver* pass_observer,
VariableSizedHandleScope* handles) const;
+ void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
std::unique_ptr<std::ostream> visualizer_output_;
@@ -772,7 +775,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
return nullptr;
}
- CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item);
+ CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx);
HGraph* graph = new (allocator) HGraph(
allocator,
arena_stack,
@@ -783,7 +786,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
compiler_driver->GetCompilerOptions().GetDebuggable(),
osr);
- const uint8_t* interpreter_metadata = nullptr;
+ ArrayRef<const uint8_t> interpreter_metadata;
// For AOT compilation, we may not get a method, for example if its class is erroneous.
// JIT should always have a method.
DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
@@ -940,7 +943,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
compiler_driver,
codegen.get(),
compilation_stats_.get(),
- /* interpreter_metadata */ nullptr,
+ /* interpreter_metadata */ ArrayRef<const uint8_t>(),
handles);
builder.BuildIntrinsicGraph(method);
}
@@ -1230,7 +1233,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
debug::MethodDebugInfo info = {};
- DCHECK(info.trampoline_name.empty());
+ DCHECK(info.custom_name.empty());
info.dex_file = dex_file;
info.class_def_index = class_def_idx;
info.dex_method_index = method_idx;
@@ -1246,14 +1249,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
info.code_info = nullptr;
info.cfi = jni_compiled_method.GetCfi();
- // If both flags are passed, generate full debug info.
- const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
- std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
- GetCompilerDriver()->GetInstructionSet(),
- GetCompilerDriver()->GetInstructionSetFeatures(),
- mini_debug_info,
- info);
- CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+ GenerateJitDebugInfo(method, info);
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1361,7 +1357,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
debug::MethodDebugInfo info = {};
- DCHECK(info.trampoline_name.empty());
+ DCHECK(info.custom_name.empty());
info.dex_file = dex_file;
info.class_def_index = class_def_idx;
info.dex_method_index = method_idx;
@@ -1377,14 +1373,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
- // If both flags are passed, generate full debug info.
- const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
- std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
- GetCompilerDriver()->GetInstructionSet(),
- GetCompilerDriver()->GetInstructionSetFeatures(),
- mini_debug_info,
- info);
- CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+ GenerateJitDebugInfo(method, info);
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1408,4 +1397,27 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return true;
}
+void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) {
+ const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+ DCHECK(compiler_options.GenerateAnyDebugInfo());
+
+ // If both flags are passed, generate full debug info.
+ const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+
+ // Create entry for the single method that we just compiled.
+ std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
+ GetCompilerDriver()->GetInstructionSet(),
+ GetCompilerDriver()->GetInstructionSetFeatures(),
+ mini_debug_info,
+ ArrayRef<const debug::MethodDebugInfo>(&info, 1));
+ MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_);
+ JITCodeEntry* entry = CreateJITCodeEntry(elf_file);
+ IncrementJITCodeEntryRefcount(entry, info.code_address);
+
+ VLOG(jit)
+ << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method)
+ << " size=" << PrettySize(elf_file.size())
+ << " total_size=" << PrettySize(GetJITCodeEntryMemUsage());
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 32a94ab5e4..0023265e50 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -75,6 +75,7 @@ enum class MethodCompilationStat {
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
kSimplifyIf,
+ kSimplifyThrowingInvoke,
kInstructionSunk,
kNotInlinedUnresolvedEntrypoint,
kNotInlinedDexCache,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 8c97d57f4a..6dcbadba6e 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -17,12 +17,16 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
+#include <memory>
+#include <vector>
+
#include "base/scoped_arena_allocator.h"
#include "builder.h"
#include "common_compiler_test.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
+#include "dex/standard_dex_file.h"
#include "driver/dex_compilation_unit.h"
#include "handle_scope-inl.h"
#include "mirror/class_loader.h"
@@ -99,18 +103,11 @@ class ArenaPoolAndAllocator {
ScopedArenaAllocator scoped_allocator_;
};
-inline HGraph* CreateGraph(ArenaPoolAndAllocator* pool_and_allocator) {
- return new (pool_and_allocator->GetAllocator()) HGraph(
- pool_and_allocator->GetAllocator(),
- pool_and_allocator->GetArenaStack(),
- *reinterpret_cast<DexFile*>(pool_and_allocator->GetAllocator()->Alloc(sizeof(DexFile))),
- /*method_idx*/-1,
- kRuntimeISA);
-}
-
-class OptimizingUnitTest : public CommonCompilerTest {
- protected:
- OptimizingUnitTest() : pool_and_allocator_(new ArenaPoolAndAllocator()) { }
+// Have a separate helper so the OptimizingCFITest can inherit it without causing
+// multiple inheritance errors from having two gtest as a parent twice.
+class OptimizingUnitTestHelper {
+ public:
+ OptimizingUnitTestHelper() : pool_and_allocator_(new ArenaPoolAndAllocator()) { }
ArenaAllocator* GetAllocator() { return pool_and_allocator_->GetAllocator(); }
ArenaStack* GetArenaStack() { return pool_and_allocator_->GetArenaStack(); }
@@ -122,14 +119,42 @@ class OptimizingUnitTest : public CommonCompilerTest {
}
HGraph* CreateGraph() {
- return art::CreateGraph(pool_and_allocator_.get());
+ ArenaAllocator* const allocator = pool_and_allocator_->GetAllocator();
+
+ // Reserve a big array of 0s so the dex file constructor can offsets from the header.
+ static constexpr size_t kDexDataSize = 4 * KB;
+ const uint8_t* dex_data = reinterpret_cast<uint8_t*>(allocator->Alloc(kDexDataSize));
+
+ // Create the dex file based on the fake data. Call the constructor so that we can use virtual
+ // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks.
+ dex_files_.emplace_back(new StandardDexFile(
+ dex_data,
+ sizeof(StandardDexFile::Header),
+ "no_location",
+ /*location_checksum*/ 0,
+ /*oat_dex_file*/ nullptr,
+ /*container*/ nullptr));
+
+ return new (allocator) HGraph(
+ allocator,
+ pool_and_allocator_->GetArenaStack(),
+ *dex_files_.back(),
+ /*method_idx*/-1,
+ kRuntimeISA);
}
// Create a control-flow graph from Dex instructions.
- HGraph* CreateCFG(const uint16_t* data, DataType::Type return_type = DataType::Type::kInt32) {
- const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(data);
+ HGraph* CreateCFG(const std::vector<uint16_t>& data,
+ DataType::Type return_type = DataType::Type::kInt32) {
HGraph* graph = CreateGraph();
+ // The code item data might not aligned to 4 bytes, copy it to ensure that.
+ const size_t code_item_size = data.size() * sizeof(data.front());
+ void* aligned_data = GetAllocator()->Alloc(code_item_size);
+ memcpy(aligned_data, &data[0], code_item_size);
+ CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment);
+ const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data);
+
{
ScopedObjectAccess soa(Thread::Current());
if (handles_ == nullptr) {
@@ -146,7 +171,7 @@ class OptimizingUnitTest : public CommonCompilerTest {
/* access_flags */ 0u,
/* verified_method */ nullptr,
handles_->NewHandle<mirror::DexCache>(nullptr));
- CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item);
+ CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u);
HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type);
bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
return graph_built ? graph : nullptr;
@@ -154,10 +179,13 @@ class OptimizingUnitTest : public CommonCompilerTest {
}
private:
+ std::vector<std::unique_ptr<const StandardDexFile>> dex_files_;
std::unique_ptr<ArenaPoolAndAllocator> pool_and_allocator_;
std::unique_ptr<VariableSizedHandleScope> handles_;
};
+class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {};
+
// Naive string diff data type.
typedef std::list<std::pair<std::string, std::string>> diff_t;
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 4fc7fe9427..6ef386b4a5 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -29,10 +29,10 @@ namespace art {
class PrettyPrinterTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data, const char* expected);
+ void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
-void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) {
+void PrettyPrinterTest::TestCode(const std::vector<uint16_t>& data, const char* expected) {
HGraph* graph = CreateCFG(data);
StringPrettyPrinter printer(graph);
printer.VisitInsertionOrder();
@@ -40,7 +40,7 @@ void PrettyPrinterTest::TestCode(const uint16_t* data, const char* expected) {
}
TEST_F(PrettyPrinterTest, ReturnVoid) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID);
const char* expected =
@@ -67,7 +67,7 @@ TEST_F(PrettyPrinterTest, CFG1) {
"BasicBlock 3, pred: 2\n"
" 4: Exit\n";
- const uint16_t data[] =
+ const std::vector<uint16_t> data =
ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
@@ -89,7 +89,7 @@ TEST_F(PrettyPrinterTest, CFG2) {
"BasicBlock 4, pred: 3\n"
" 5: Exit\n";
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
@@ -111,21 +111,21 @@ TEST_F(PrettyPrinterTest, CFG3) {
"BasicBlock 4, pred: 2\n"
" 5: Exit\n";
- const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x200,
Instruction::RETURN_VOID,
Instruction::GOTO | 0xFF00);
TestCode(data1, expected);
- const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_16, 3,
Instruction::RETURN_VOID,
Instruction::GOTO_16, 0xFFFF);
TestCode(data2, expected);
- const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 4, 0,
Instruction::RETURN_VOID,
Instruction::GOTO_32, 0xFFFF, 0xFFFF);
@@ -144,13 +144,13 @@ TEST_F(PrettyPrinterTest, CFG4) {
"BasicBlock 3, pred: 0, succ: 1\n"
" 0: Goto 1\n";
- const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM(
Instruction::NOP,
Instruction::GOTO | 0xFF00);
TestCode(data1, expected);
- const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 0, 0);
TestCode(data2, expected);
@@ -166,7 +166,7 @@ TEST_F(PrettyPrinterTest, CFG5) {
"BasicBlock 3, pred: 1\n"
" 3: Exit\n";
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID,
Instruction::GOTO | 0x100,
Instruction::GOTO | 0xFE00);
@@ -192,7 +192,7 @@ TEST_F(PrettyPrinterTest, CFG6) {
"BasicBlock 5, pred: 1, succ: 3\n"
" 0: Goto 3\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -220,7 +220,7 @@ TEST_F(PrettyPrinterTest, CFG7) {
"BasicBlock 6, pred: 1, succ: 2\n"
" 1: Goto 2\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -240,7 +240,7 @@ TEST_F(PrettyPrinterTest, IntConstant) {
"BasicBlock 2, pred: 1\n"
" 4: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 1d3fe0334d..27f9ac3990 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -103,6 +103,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint
case DataType::Type::kFloat64:
slot += long_spill_slots;
FALLTHROUGH_INTENDED;
+ case DataType::Type::kUint64:
case DataType::Type::kInt64:
slot += float_spill_slots;
FALLTHROUGH_INTENDED;
@@ -110,6 +111,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint
slot += int_spill_slots;
FALLTHROUGH_INTENDED;
case DataType::Type::kReference:
+ case DataType::Type::kUint32:
case DataType::Type::kInt32:
case DataType::Type::kUint16:
case DataType::Type::kUint8:
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index ad5248e982..fa7ad82316 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1972,6 +1972,8 @@ void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode*
case DataType::Type::kInt16:
int_intervals.push_back(parent);
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
UNREACHABLE();
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index cfe63bd758..216fb57a96 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1131,6 +1131,8 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
case DataType::Type::kInt16:
spill_slots = &int_spill_slots_;
break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
}
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3748d599a3..a70b0664dc 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -46,7 +46,7 @@ class RegisterAllocatorTest : public OptimizingUnitTest {
void ExpectedInRegisterHint(Strategy strategy);
// Helper functions that make use of the OptimizingUnitTest's members.
- bool Check(const uint16_t* data, Strategy strategy);
+ bool Check(const std::vector<uint16_t>& data, Strategy strategy);
void CFG1(Strategy strategy);
void Loop1(Strategy strategy);
void Loop2(Strategy strategy);
@@ -79,7 +79,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
test_name(Strategy::kRegisterAllocatorGraphColor);\
}
-bool RegisterAllocatorTest::Check(const uint16_t* data, Strategy strategy) {
+bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) {
HGraph* graph = CreateCFG(data);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
@@ -185,7 +185,7 @@ void RegisterAllocatorTest::CFG1(Strategy strategy) {
* |
* exit
*/
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
@@ -222,7 +222,7 @@ void RegisterAllocatorTest::Loop1(Strategy strategy) {
* exit
*/
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -268,7 +268,7 @@ void RegisterAllocatorTest::Loop2(Strategy strategy) {
* exit
*/
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 8 << 12 | 1 << 8,
Instruction::IF_EQ | 1 << 8, 7,
@@ -314,7 +314,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) {
* exit
*/
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8,
Instruction::CONST_4 | 5 << 12 | 2 << 8,
@@ -351,7 +351,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) {
TEST_ALL_STRATEGIES(Loop3);
TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
- const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8,
@@ -402,7 +402,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) {
* } while (true);
*/
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 0,
Instruction::IF_NE | 1 << 8 | 1 << 12, 3,
@@ -432,7 +432,7 @@ TEST_ALL_STRATEGIES(DeadPhi);
* This test only applies to the linear scan allocator.
*/
TEST_F(RegisterAllocatorTest, FreeUntil) {
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 104ebc79c2..fb15fc8975 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -182,7 +182,9 @@ class SchedulerTest : public OptimizingUnitTest {
scheduler->Schedule(graph_);
}
- void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+ void CompileWithRandomSchedulerAndRun(const std::vector<uint16_t>& data,
+ bool has_result,
+ int expected) {
for (CodegenTargetConfig target_config : GetTargetConfigs()) {
HGraph* graph = CreateCFG(data);
@@ -393,7 +395,7 @@ TEST_F(SchedulerTest, RandomScheduling) {
// }
// return result;
//
- const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0
Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000
Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 77e70d733e..85ed06eb9b 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -31,7 +31,7 @@ namespace art {
class SsaTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data, const char* expected);
+ void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
class SsaPrettyPrinter : public HPrettyPrinter {
@@ -80,7 +80,7 @@ static void ReNumberInstructions(HGraph* graph) {
}
}
-void SsaTest::TestCode(const uint16_t* data, const char* expected) {
+void SsaTest::TestCode(const std::vector<uint16_t>& data, const char* expected) {
HGraph* graph = CreateCFG(data);
// Suspend checks implementation may change in the future, and this test relies
// on how instructions are ordered.
@@ -119,7 +119,7 @@ TEST_F(SsaTest, CFG1) {
"BasicBlock 5, pred: 1, succ: 3\n"
" 7: Goto\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
@@ -150,7 +150,7 @@ TEST_F(SsaTest, CFG2) {
"BasicBlock 5, pred: 1, succ: 3\n"
" 9: Goto\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -181,7 +181,7 @@ TEST_F(SsaTest, CFG3) {
"BasicBlock 5, pred: 4\n"
" 10: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -214,7 +214,7 @@ TEST_F(SsaTest, Loop1) {
"BasicBlock 6, pred: 5\n"
" 10: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -245,7 +245,7 @@ TEST_F(SsaTest, Loop2) {
"BasicBlock 5, pred: 4\n"
" 9: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -276,7 +276,7 @@ TEST_F(SsaTest, Loop3) {
"BasicBlock 5, pred: 4\n"
" 10: Exit\n";
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -310,7 +310,7 @@ TEST_F(SsaTest, Loop4) {
"BasicBlock 6, pred: 5\n"
" 10: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::GOTO | 0x500,
Instruction::IF_EQ, 5,
@@ -351,7 +351,7 @@ TEST_F(SsaTest, Loop5) {
" 13: Phi(2, 1) [11, 8, 8]\n"
" 14: Goto\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 4,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -390,7 +390,7 @@ TEST_F(SsaTest, Loop6) {
"BasicBlock 7, pred: 6\n"
" 13: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 8,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -432,7 +432,7 @@ TEST_F(SsaTest, Loop7) {
"BasicBlock 8, pred: 2, succ: 6\n"
" 15: Goto\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 8,
Instruction::CONST_4 | 4 << 12 | 0,
@@ -456,7 +456,7 @@ TEST_F(SsaTest, DeadLocal) {
"BasicBlock 2, pred: 1\n"
" 3: Exit\n";
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
@@ -484,7 +484,7 @@ TEST_F(SsaTest, LocalInIf) {
"BasicBlock 5, pred: 1, succ: 3\n"
" 8: Goto\n";
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -520,7 +520,7 @@ TEST_F(SsaTest, MultiplePredecessors) {
"BasicBlock 7, pred: 3, succ: 5\n"
" 12: Goto\n";
- const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+ const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 5,
Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8,
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
new file mode 100644
index 0000000000..a7c23bef7e
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "superblock_cloner.h"
+
+#include "common_dominator.h"
+#include "graph_checker.h"
+
+#include <iostream>
+
+namespace art {
+
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
+
+void HEdge::Dump(std::ostream& stream) const {
+ stream << "(" << from_ << "->" << to_ << ")";
+}
+
+//
+// Static helper methods.
+//
+
+// Returns whether instruction has any uses (regular or environmental) outside the region,
+// defined by basic block set.
+static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) {
+ auto& uses = instr->GetUses();
+ for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) {
+ HInstruction* user = use_node->GetUser();
+ if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+ return true;
+ }
+ }
+
+ auto& env_uses = instr->GetEnvUses();
+ for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) {
+ HInstruction* user = use_node->GetUser()->GetHolder();
+ if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns whether the phi's inputs are the same HInstruction.
+static bool ArePhiInputsTheSame(const HPhi* phi) {
+ HInstruction* first_input = phi->InputAt(0);
+ for (size_t i = 1, e = phi->InputCount(); i < e; i++) {
+ if (phi->InputAt(i) != first_input) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+ if (loop1 != nullptr || loop2 != nullptr) {
+ return nullptr;
+ }
+
+ if (loop1->IsIn(*loop2)) {
+ return loop2;
+ } else if (loop2->IsIn(*loop1)) {
+ return loop1;
+ }
+ HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
+ return block->GetLoopInformation();
+}
+
+// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
+static void OrderLoopsHeadersPredecessors(HGraph* graph) {
+ for (HBasicBlock* block : graph->GetPostOrder()) {
+ if (block->IsLoopHeader()) {
+ graph->OrderLoopHeaderPredecessors(block);
+ }
+ }
+}
+
+//
+// Helpers for CloneBasicBlock.
+//
+
+void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) {
+ DCHECK(!copy_instr->IsPhi());
+ for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) {
+ // Copy instruction holds the same input as the original instruction holds.
+ HInstruction* orig_input = copy_instr->InputAt(i);
+ if (!IsInOrigBBSet(orig_input->GetBlock())) {
+ // Defined outside the subgraph.
+ continue;
+ }
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ // copy_instr will be registered as a user of copy_inputs after returning from this function:
+ // 'copy_block->AddInstruction(copy_instr)'.
+ copy_instr->SetRawInputAt(i, copy_input);
+ }
+}
+
+void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr,
+ const HEnvironment* orig_env) {
+ if (orig_env->GetParent() != nullptr) {
+ DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent());
+ }
+ HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr);
+
+ for (size_t i = 0; i < orig_env->Size(); i++) {
+ HInstruction* env_input = orig_env->GetInstructionAt(i);
+ if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) {
+ env_input = GetInstrCopy(env_input);
+ DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr);
+ }
+ copy_env->SetRawEnvAt(i, env_input);
+ if (env_input != nullptr) {
+ env_input->AddEnvUseAt(copy_env, i);
+ }
+ }
+ // InsertRawEnvironment assumes that instruction already has an environment that's why we use
+ // SetRawEnvironment in the 'else' case.
+ // As this function calls itself recursively with the same copy_instr - this copy_instr may
+ // have partially copied chain of HEnvironments.
+ if (copy_instr->HasEnvironment()) {
+ copy_instr->InsertRawEnvironment(copy_env);
+ } else {
+ copy_instr->SetRawEnvironment(copy_env);
+ }
+}
+
+//
+// Helpers for RemapEdgesSuccessors.
+//
+
+void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+
+ size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ size_t phi_input_count = 0;
+ // This flag reflects whether the original successor has at least one phi and this phi
+ // has been already processed in the loop. Used for validation purposes in DCHECK to check that
+ // in the end all of the phis in the copy successor have the same number of inputs - the number
+ // of copy successor's predecessors.
+ bool first_phi_met = false;
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(this_index);
+ // Remove corresponding input for original phi.
+ orig_phi->RemoveInputAt(this_index);
+ // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds
+ // to orig_block, so add the input at the end of the list.
+ copy_phi->AddInput(orig_phi_input);
+ if (!first_phi_met) {
+ phi_input_count = copy_phi->InputCount();
+ first_phi_met = true;
+ } else {
+ DCHECK_EQ(phi_input_count, copy_phi->InputCount());
+ }
+ }
+ // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds
+ // to the previously added phi inputs position.
+ orig_block->ReplaceSuccessor(orig_succ, copy_succ);
+ DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count);
+}
+
+void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+ copy_block->AddSuccessor(copy_succ);
+
+ size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+ copy_phi->AddInput(orig_phi_input);
+ }
+}
+
+void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ copy_block->AddSuccessor(orig_succ);
+ DCHECK(copy_block->HasSuccessor(orig_succ));
+
+ size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+ orig_phi->AddInput(orig_phi_input);
+ }
+}
+
+//
+// Local versions of CF calculation/adjustment routines.
+//
+
+// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect
+// the performance of the base version by checking the local set.
+// TODO: this version works when updating the back edges info for natural loop-based local_set.
+// Check which exactly types of subgraphs can be analysed or rename it to
+// FindBackEdgesInTheNaturalLoop.
+void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) {
+ ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
+ DCHECK_EQ(visited.GetHighestBitSet(), -1);
+
+ // Nodes that we're currently visiting, indexed by block id.
+ ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder);
+ // Number of successors visited from a given node, indexed by block id.
+ ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(),
+ 0u,
+ arena_->Adapter(kArenaAllocGraphBuilder));
+ // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
+ ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
+ constexpr size_t kDefaultWorklistSize = 8;
+ worklist.reserve(kDefaultWorklistSize);
+
+ visited.SetBit(entry_block->GetBlockId());
+ visiting.SetBit(entry_block->GetBlockId());
+ worklist.push_back(entry_block);
+
+ while (!worklist.empty()) {
+ HBasicBlock* current = worklist.back();
+ uint32_t current_id = current->GetBlockId();
+ if (successors_visited[current_id] == current->GetSuccessors().size()) {
+ visiting.ClearBit(current_id);
+ worklist.pop_back();
+ } else {
+ HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+ uint32_t successor_id = successor->GetBlockId();
+ if (!local_set->IsBitSet(successor_id)) {
+ continue;
+ }
+
+ if (visiting.IsBitSet(successor_id)) {
+ DCHECK(ContainsElement(worklist, successor));
+ successor->AddBackEdgeWhileUpdating(current);
+ } else if (!visited.IsBitSet(successor_id)) {
+ visited.SetBit(successor_id);
+ visiting.SetBit(successor_id);
+ worklist.push_back(successor);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
+ // TODO: DCHECK that after the transformation the graph is connected.
+ HBasicBlock* block_entry = nullptr;
+
+ if (outer_loop_ == nullptr) {
+ for (auto block : graph_->GetBlocks()) {
+ if (block != nullptr) {
+ outer_loop_bb_set->SetBit(block->GetBlockId());
+ HLoopInformation* info = block->GetLoopInformation();
+ if (info != nullptr) {
+ info->ResetBasicBlockData();
+ }
+ }
+ }
+ block_entry = graph_->GetEntryBlock();
+ } else {
+ outer_loop_bb_set->Copy(&outer_loop_bb_set_);
+ block_entry = outer_loop_->GetHeader();
+
+ // Add newly created copy blocks.
+ for (auto entry : *bb_map_) {
+ outer_loop_bb_set->SetBit(entry.second->GetBlockId());
+ }
+
+ // Clear loop_info for the whole outer loop.
+ for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* info = block->GetLoopInformation();
+ if (info != nullptr) {
+ info->ResetBasicBlockData();
+ }
+ }
+ }
+
+ FindBackEdgesLocal(block_entry, outer_loop_bb_set);
+
+ for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* info = block->GetLoopInformation();
+ // Reset LoopInformation for regular blocks and old headers which are no longer loop headers.
+ if (info != nullptr &&
+ (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) {
+ block->SetLoopInformation(nullptr);
+ }
+ }
+}
+
+// This is a modified version of HGraph::AnalyzeLoops.
+GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) {
+ // We iterate post order to ensure we visit inner loops before outer loops.
+ // `PopulateRecursive` needs this guarantee to know whether a natural loop
+ // contains an irreducible loop.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+ continue;
+ }
+ if (block->IsLoopHeader()) {
+ if (block->IsCatchBlock()) {
+ // TODO: Dealing with exceptional back edges could be tricky because
+ // they only approximate the real control flow. Bail out for now.
+ return kAnalysisFailThrowCatchLoop;
+ }
+ block->GetLoopInformation()->Populate();
+ }
+ }
+
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+ continue;
+ }
+ if (block->IsLoopHeader()) {
+ HLoopInformation* cur_loop = block->GetLoopInformation();
+ HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation();
+ if (outer_loop != nullptr) {
+ outer_loop->PopulateInnerLoopUpwards(cur_loop);
+ }
+ }
+ }
+
+ return kAnalysisSuccess;
+}
+
+void SuperblockCloner::CleanUpControlFlow() {
+ // TODO: full control flow clean up for now, optimize it.
+ graph_->ClearDominanceInformation();
+
+ ArenaBitVector outer_loop_bb_set(
+ arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+ // TODO: do it locally.
+ graph_->SimplifyCFG();
+ graph_->ComputeDominanceInformation();
+
+ // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just
+ // before in ComputeDominanceInformation.
+ GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set);
+ DCHECK_EQ(result, kAnalysisSuccess);
+
+ // TODO: do it locally
+ OrderLoopsHeadersPredecessors(graph_);
+
+ graph_->ComputeTryBlockInformation();
+}
+
+//
+// Helpers for ResolveDataFlow
+//
+
+void SuperblockCloner::ResolvePhi(HPhi* phi) {
+ HBasicBlock* phi_block = phi->GetBlock();
+ for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+ HInstruction* input = phi->InputAt(i);
+ HBasicBlock* input_block = input->GetBlock();
+
+ // Originally defined outside the region.
+ if (!IsInOrigBBSet(input_block)) {
+ continue;
+ }
+ HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i];
+ if (!IsInOrigBBSet(corresponding_block)) {
+ phi->ReplaceInput(GetInstrCopy(input), i);
+ }
+ }
+}
+
+//
+// Main algorithm methods.
+//
+
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+ DCHECK(exits->empty());
+ for (uint32_t block_id : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(block_id);
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ if (!IsInOrigBBSet(succ)) {
+ exits->push_back(succ);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
+ DCHECK(outer_loop_ == nullptr);
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+
+ // For a reducible graph we need to update back-edges and dominance information only for
+ // the outermost loop which is affected by the transformation - it can be found by picking
+ // the common most outer loop of loops to which the subgraph exits blocks belong.
+ // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case).
+ for (HBasicBlock* exit : exits) {
+ HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation();
+ if (loop_exit_loop_info == nullptr) {
+ outer_loop_ = nullptr;
+ break;
+ }
+ outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
+ }
+
+ if (outer_loop_ != nullptr) {
+ // Save the loop population info as it will be changed later.
+ outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks());
+ }
+}
+
+void SuperblockCloner::RemapEdgesSuccessors() {
+ // Redirect incoming edges.
+ for (HEdge e : *remap_incoming_) {
+ HBasicBlock* orig_block = GetBlockById(e.GetFrom());
+ HBasicBlock* orig_succ = GetBlockById(e.GetTo());
+ RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+ }
+
+ // Redirect internal edges.
+ for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+ HBasicBlock* orig_block = GetBlockById(orig_block_id);
+
+ for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) {
+ uint32_t orig_succ_id = orig_succ->GetBlockId();
+
+ // Check for outgoing edge.
+ if (!IsInOrigBBSet(orig_succ)) {
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ copy_block->AddSuccessor(orig_succ);
+ continue;
+ }
+
+ auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+ auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+
+ // Due to construction all successors of copied block were set to original.
+ if (copy_redir != remap_copy_internal_->end()) {
+ RemapCopyInternalEdge(orig_block, orig_succ);
+ } else {
+ AddCopyInternalEdge(orig_block, orig_succ);
+ }
+
+ if (orig_redir != remap_orig_internal_->end()) {
+ RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::AdjustControlFlowInfo() {
+ ArenaBitVector outer_loop_bb_set(
+ arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+ graph_->ClearDominanceInformation();
+ // TODO: Do it locally.
+ graph_->ComputeDominanceInformation();
+}
+
+// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to
+// the valid values; only phis' inputs must be adjusted.
+void SuperblockCloner::ResolveDataFlow() {
+ for (auto entry : *bb_map_) {
+ HBasicBlock* orig_block = entry.first;
+
+ for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ ResolvePhi(orig_phi);
+ ResolvePhi(copy_phi);
+ }
+ if (kIsDebugBuild) {
+ // Inputs of instruction copies must be already mapped to correspondent inputs copies.
+ for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+ CheckInstructionInputsRemapping(it.Current());
+ }
+ }
+ }
+}
+
+//
+// Debug and logging methods.
+//
+
+void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
+ DCHECK(!orig_instr->IsPhi());
+ HInstruction* copy_instr = GetInstrCopy(orig_instr);
+ for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+ HInstruction* orig_input = orig_instr->InputAt(i);
+ DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock()));
+
+ // If original input is defined outside the region then it will remain for both original
+ // instruction and the copy after the transformation.
+ if (!IsInOrigBBSet(orig_input->GetBlock())) {
+ continue;
+ }
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+ }
+
+ // Resolve environment.
+ if (orig_instr->HasEnvironment()) {
+ HEnvironment* orig_env = orig_instr->GetEnvironment();
+
+ for (size_t i = 0, e = orig_env->Size(); i < e; ++i) {
+ HInstruction* orig_input = orig_env->GetInstructionAt(i);
+
+ // If original input is defined outside the region then it will remain for both original
+ // instruction and the copy after the transformation.
+ if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) {
+ continue;
+ }
+
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+ }
+ }
+}
+
+//
+// Public methods.
+//
+
+SuperblockCloner::SuperblockCloner(HGraph* graph,
+ const HBasicBlockSet* orig_bb_set,
+ HBasicBlockMap* bb_map,
+ HInstructionMap* hir_map)
+ : graph_(graph),
+ arena_(graph->GetAllocator()),
+ orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+ remap_orig_internal_(nullptr),
+ remap_copy_internal_(nullptr),
+ remap_incoming_(nullptr),
+ bb_map_(bb_map),
+ hir_map_(hir_map),
+ outer_loop_(nullptr),
+ outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+ orig_bb_set_.Copy(orig_bb_set);
+}
+
+void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+ const HEdgeSet* remap_copy_internal,
+ const HEdgeSet* remap_incoming) {
+ remap_orig_internal_ = remap_orig_internal;
+ remap_copy_internal_ = remap_copy_internal;
+ remap_incoming_ = remap_incoming;
+}
+
+bool SuperblockCloner::IsSubgraphClonable() const {
+ // TODO: Support irreducible graphs and graphs with try-catch.
+ if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) {
+ return false;
+ }
+
+ // Check that there are no instructions defined in the subgraph and used outside.
+ // TODO: Improve this by accepting graph with such uses but only one exit.
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable() ||
+ IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ return false;
+ }
+ }
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable() ||
+ IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void SuperblockCloner::Run() {
+ DCHECK(bb_map_ != nullptr);
+ DCHECK(hir_map_ != nullptr);
+ DCHECK(remap_orig_internal_ != nullptr &&
+ remap_copy_internal_ != nullptr &&
+ remap_incoming_ != nullptr);
+ DCHECK(IsSubgraphClonable());
+
+ // Find an area in the graph for which control flow information should be adjusted.
+ FindAndSetLocalAreaForAdjustments();
+ // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
+ // adjusted.
+ CloneBasicBlocks();
+ // Connect the blocks together/remap successors and fix phis which are directly affected my the
+ // remapping.
+ RemapEdgesSuccessors();
+ // Recalculate dominance and backedge information which is required by the next stage.
+ AdjustControlFlowInfo();
+ // Fix data flow of the graph.
+ ResolveDataFlow();
+}
+
+void SuperblockCloner::CleanUp() {
+ CleanUpControlFlow();
+
+ // Remove phis which have all inputs being same.
+ // When a block has a single predecessor it must not have any phis. However after the
+ // transformation it could happen that there is such block with a phi with a single input.
+ // As this is needed to be processed we also simplify phis with multiple same inputs here.
+ for (auto entry : *bb_map_) {
+ HBasicBlock* orig_block = entry.first;
+ for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HPhi* phi = inst_it.Current()->AsPhi();
+ if (ArePhiInputsTheSame(phi)) {
+ phi->ReplaceWith(phi->InputAt(0));
+ orig_block->RemovePhi(phi);
+ }
+ }
+
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HPhi* phi = inst_it.Current()->AsPhi();
+ if (ArePhiInputsTheSame(phi)) {
+ phi->ReplaceWith(phi->InputAt(0));
+ copy_block->RemovePhi(phi);
+ }
+ }
+ }
+}
+
+HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
+ HGraph* graph = orig_block->GetGraph();
+ HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc());
+ graph->AddBlock(copy_block);
+
+ // Clone all the phis and add them to the map.
+ for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* orig_instr = it.Current();
+ HInstruction* copy_instr = orig_instr->Clone(arena_);
+ copy_block->AddPhi(copy_instr->AsPhi());
+ copy_instr->AsPhi()->RemoveAllInputs();
+ DCHECK(!orig_instr->HasEnvironment());
+ hir_map_->Put(orig_instr, copy_instr);
+ }
+
+ // Clone all the instructions and add them to the map.
+ for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* orig_instr = it.Current();
+ HInstruction* copy_instr = orig_instr->Clone(arena_);
+ ReplaceInputsWithCopies(copy_instr);
+ copy_block->AddInstruction(copy_instr);
+ if (orig_instr->HasEnvironment()) {
+ DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment());
+ }
+ hir_map_->Put(orig_instr, copy_instr);
+ }
+
+ return copy_block;
+}
+
+void SuperblockCloner::CloneBasicBlocks() {
+ // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied
+ // instructions might be replaced by copies of the original inputs (depending where those inputs
+ // are defined). So the definitions of the original inputs must be visited before their original
+ // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')"
+ // guarantees that.
+ for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) {
+ if (!IsInOrigBBSet(orig_block)) {
+ continue;
+ }
+ HBasicBlock* copy_block = CloneBasicBlock(orig_block);
+ bb_map_->Put(orig_block, copy_block);
+ if (kSuperblockClonerLogging) {
+ std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
+ std::endl;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
new file mode 100644
index 0000000000..23de692673
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+
+#include "base/arena_bit_vector.h"
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
+#include "nodes.h"
+
+namespace art {
+
+static const bool kSuperblockClonerLogging = false;
+static const bool kSuperblockClonerVerify = false;
+
+// Represents an edge between two HBasicBlocks.
+//
+// Note: objects of this class are small - pass them by value.
+class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
+ public:
+ HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
+ DCHECK_NE(to_, kInvalidBlockId);
+ DCHECK_NE(from_, kInvalidBlockId);
+ }
+ HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
+ DCHECK_NE(to_, kInvalidBlockId);
+ DCHECK_NE(from_, kInvalidBlockId);
+ }
+ HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
+
+ uint32_t GetFrom() const { return from_; }
+ uint32_t GetTo() const { return to_; }
+
+ bool operator==(const HEdge& other) const {
+ return this->from_ == other.from_ && this->to_ == other.to_;
+ }
+
+ bool operator!=(const HEdge& other) const { return !operator==(other); }
+ void Dump(std::ostream& stream) const;
+
+ // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
+ // has to_ block as a successor.
+ bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
+
+ private:
+ // Predecessor block id.
+ uint32_t from_;
+ // Successor block id.
+ uint32_t to_;
+};
+
+// Returns whether a HEdge edge corresponds to an existing edge in the graph.
+inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
+ if (!edge.IsValid()) {
+ return false;
+ }
+ uint32_t from = edge.GetFrom();
+ uint32_t to = edge.GetTo();
+ if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
+ return false;
+ }
+
+ HBasicBlock* block_from = graph->GetBlocks()[from];
+ HBasicBlock* block_to = graph->GetBlocks()[to];
+ if (block_from == nullptr || block_to == nullptr) {
+ return false;
+ }
+
+ return block_from->HasSuccessor(block_to, 0);
+}
+
+// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
+// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
+// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
+// and a set of rules how to treat edges, remap their successors. By using this approach such
+// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
+//
+// The idea of the transformation is based on "Superblock cloning" technique described in the book
+// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
+// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
+// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
+// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
+//
+// There are two states of the IR graph: original graph (before the transformation) and
+// copy graph (after).
+//
+// Before the transformation:
+// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
+// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
+// where pred, succ - basic blocks):
+// - internal - pred, succ are members of ‘orig_bb_set’.
+// - outside - pred, succ are not members of ‘orig_bb_set’.
+// - incoming - pred is not a member of ‘orig_bb_set’, succ is.
+// - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
+//
+// Transformation:
+//
+// 1. Initial cloning:
+// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
+// form ‘copy_bb_set’.
+// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
+// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
+// set.
+// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
+// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
+// set.
+// 2. Successors remapping.
+// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
+// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
+// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
+// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
+// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
+// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
+// (X, Y_1)).
+// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
+// 4. Fix/resolve data flow.
+// 5. Do cleanups (DCE, critical edges splitting, etc).
+//
+class SuperblockCloner : public ValueObject {
+ public:
+ // TODO: Investigate optimal types for the containers.
+ using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
+ using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
+ using HBasicBlockSet = ArenaBitVector;
+ using HEdgeSet = ArenaHashSet<HEdge>;
+
+ SuperblockCloner(HGraph* graph,
+ const HBasicBlockSet* orig_bb_set,
+ HBasicBlockMap* bb_map,
+ HInstructionMap* hir_map);
+
+ // Sets edge successor remapping info specified by corresponding edge sets.
+ void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+ const HEdgeSet* remap_copy_internal,
+ const HEdgeSet* remap_incoming);
+
+ // Returns whether the specified subgraph is copyable.
+ // TODO: Start from small range of graph patterns then extend it.
+ bool IsSubgraphClonable() const;
+
+ // Runs the copy algorithm according to the description.
+ void Run();
+
+ // Cleans up the graph after transformation: splits critical edges, recalculates control flow
+ // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
+ void CleanUp();
+
+ // Returns a clone of a basic block (orig_block).
+ //
+ // - The copy block will have no successors/predecessors; they should be set up manually.
+ // - For each instruction in the orig_block a copy is created and inserted into the copy block;
+ // this correspondence is recorded in the map (old instruction, new instruction).
+ // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
+ // same, as in the original block, PHIs do not reflect a correct correspondence between the
+ // value and predecessors (as the copy block has no predecessors by now), etc.
+ HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
+
+ // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
+ // and hir_map_.
+ void CloneBasicBlocks();
+
+ HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
+ auto copy_input_iter = hir_map_->find(orig_instr);
+ DCHECK(copy_input_iter != hir_map_->end());
+ return copy_input_iter->second;
+ }
+
+ HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
+ HBasicBlock* block = bb_map_->Get(orig_block);
+ DCHECK(block != nullptr);
+ return block;
+ }
+
+ HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
+ for (auto it : *hir_map_) {
+ if (it.second == copy_instr) {
+ return it.first;
+ }
+ }
+ return nullptr;
+ }
+
+ bool IsInOrigBBSet(uint32_t block_id) const {
+ return orig_bb_set_.IsBitSet(block_id);
+ }
+
+ bool IsInOrigBBSet(const HBasicBlock* block) const {
+ return IsInOrigBBSet(block->GetBlockId());
+ }
+
+ private:
+ // Fills the 'exits' vector with the subgraph exits.
+ void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+
+ // Finds and records information about the area in the graph for which control-flow (back edges,
+ // loops, dominators) needs to be adjusted.
+ void FindAndSetLocalAreaForAdjustments();
+
+ // Remaps edges' successors according to the info specified in the edges sets.
+ //
+ // Only edge successors/predecessors and phis' input records (to have a correspondence between
+ // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
+ // phis' nor instructions' inputs values are resolved.
+ void RemapEdgesSuccessors();
+
+ // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+ // FindAndSetLocalAreaForAdjustments.
+ void AdjustControlFlowInfo();
+
+ // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
+ // the SSA form.
+ void ResolveDataFlow();
+
+ //
+ // Helpers for CloneBasicBlock.
+ //
+
+ // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
+ // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
+ void ReplaceInputsWithCopies(HInstruction* copy_instr);
+
+ // Recursively clones the environment for the copy instruction. If the input of the original
+ // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
+ // leaves it the same as original.
+ void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
+
+ //
+ // Helpers for RemapEdgesSuccessors.
+ //
+
+ // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
+ // copy_succ.
+ void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
+ void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
+ void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ //
+ // Local versions of control flow calculation/adjustment routines.
+ //
+
+ void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
+ void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
+ GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
+ void CleanUpControlFlow();
+
+ //
+ // Helpers for ResolveDataFlow
+ //
+
+ // Resolves the inputs of the phi.
+ void ResolvePhi(HPhi* phi);
+
+ //
+ // Debug and logging methods.
+ //
+ void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+
+ HBasicBlock* GetBlockById(uint32_t block_id) const {
+ DCHECK(block_id < graph_->GetBlocks().size());
+ HBasicBlock* block = graph_->GetBlocks()[block_id];
+ DCHECK(block != nullptr);
+ return block;
+ }
+
+ HGraph* const graph_;
+ ArenaAllocator* const arena_;
+
+ // Set of basic block in the original graph to be copied.
+ HBasicBlockSet orig_bb_set_;
+
+ // Sets of edges which require successors remapping.
+ const HEdgeSet* remap_orig_internal_;
+ const HEdgeSet* remap_copy_internal_;
+ const HEdgeSet* remap_incoming_;
+
+ // Correspondence map for blocks: (original block, copy block).
+ HBasicBlockMap* bb_map_;
+ // Correspondence map for instructions: (original HInstruction, copy HInstruction).
+ HInstructionMap* hir_map_;
+ // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+ HLoopInformation* outer_loop_;
+ HBasicBlockSet outer_loop_bb_set_;
+
+ ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+
+ DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
+};
+
+} // namespace art
+
+namespace std {
+
+template <>
+struct hash<art::HEdge> {
+ size_t operator()(art::HEdge const& x) const noexcept {
+ // Use Cantor pairing function as the hash function.
+ uint32_t a = x.GetFrom();
+ uint32_t b = x.GetTo();
+ return (a + b) * (a + b + 1) / 2 + b;
+ }
+};
+
+} // namespace std
+
+#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index fd77eb81fc..f1b7bffdf5 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -17,11 +17,15 @@
#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
+#include "superblock_cloner.h"
#include "gtest/gtest.h"
namespace art {
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
class SuperblockClonerTest : public OptimizingUnitTest {
@@ -182,4 +186,121 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
EXPECT_NE(new_suspend_check, nullptr);
}
+// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of
+// instructions' inputs.
+TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ CreateBasicLoopControlFlow(&header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ ASSERT_TRUE(CheckGraph());
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ &bb_map,
+ &hir_map);
+ EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+ cloner.CloneBasicBlocks();
+
+ EXPECT_EQ(bb_map.size(), 2u);
+ EXPECT_EQ(hir_map.size(), 12u);
+
+ for (auto it : hir_map) {
+ HInstruction* orig_instr = it.first;
+ HInstruction* copy_instr = it.second;
+
+ EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock());
+ EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind());
+ EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType());
+
+ if (orig_instr->IsPhi()) {
+ continue;
+ }
+
+ EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount());
+
+ // Check that inputs match.
+ for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+ HInstruction* orig_input = orig_instr->InputAt(i);
+ HInstruction* copy_input = copy_instr->InputAt(i);
+ if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+ EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+ } else {
+ EXPECT_EQ(orig_input, copy_input);
+ }
+ }
+
+ EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment());
+
+ // Check that environments match.
+ if (orig_instr->HasEnvironment()) {
+ HEnvironment* orig_env = orig_instr->GetEnvironment();
+ HEnvironment* copy_env = copy_instr->GetEnvironment();
+
+ EXPECT_EQ(copy_env->GetParent(), nullptr);
+ EXPECT_EQ(orig_env->Size(), copy_env->Size());
+
+ for (size_t i = 0, e = orig_env->Size(); i < e; i++) {
+ HInstruction* orig_input = orig_env->GetInstructionAt(i);
+ HInstruction* copy_input = copy_env->GetInstructionAt(i);
+ if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+ EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+ } else {
+ EXPECT_EQ(orig_input, copy_input);
+ }
+ }
+ }
+ }
+}
+
+// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control
+// flow.
+TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ CreateBasicLoopControlFlow(&header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ ASSERT_TRUE(CheckGraph());
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ nullptr,
+ nullptr);
+ EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+ cloner.FindAndSetLocalAreaForAdjustments();
+ cloner.CleanUpControlFlow();
+
+ EXPECT_TRUE(CheckGraph());
+
+ EXPECT_TRUE(entry_block_->Dominates(header));
+ EXPECT_TRUE(entry_block_->Dominates(exit_block_));
+
+ EXPECT_EQ(header->GetLoopInformation(), loop_info);
+ EXPECT_EQ(loop_info->GetHeader(), header);
+ EXPECT_TRUE(loop_info->Contains(*loop_body));
+ EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
+}
+
} // namespace art
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index 7e83f8ce5f..33823e2a11 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -30,10 +30,10 @@ namespace art {
class SuspendCheckTest : public OptimizingUnitTest {
protected:
- void TestCode(const uint16_t* data);
+ void TestCode(const std::vector<uint16_t>& data);
};
-void SuspendCheckTest::TestCode(const uint16_t* data) {
+void SuspendCheckTest::TestCode(const std::vector<uint16_t>& data) {
HGraph* graph = CreateCFG(data);
HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor();
HBasicBlock* loop_header = first_block->GetSingleSuccessor();
@@ -43,7 +43,7 @@ void SuspendCheckTest::TestCode(const uint16_t* data) {
}
TEST_F(SuspendCheckTest, CFG1) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::NOP,
Instruction::GOTO | 0xFF00);
@@ -51,14 +51,14 @@ TEST_F(SuspendCheckTest, CFG1) {
}
TEST_F(SuspendCheckTest, CFG2) {
- const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 0, 0);
TestCode(data);
}
TEST_F(SuspendCheckTest, CFG3) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 0xFFFF,
Instruction::RETURN_VOID);
@@ -67,7 +67,7 @@ TEST_F(SuspendCheckTest, CFG3) {
}
TEST_F(SuspendCheckTest, CFG4) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_NE, 0xFFFF,
Instruction::RETURN_VOID);
@@ -76,7 +76,7 @@ TEST_F(SuspendCheckTest, CFG4) {
}
TEST_F(SuspendCheckTest, CFG5) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQZ, 0xFFFF,
Instruction::RETURN_VOID);
@@ -85,7 +85,7 @@ TEST_F(SuspendCheckTest, CFG5) {
}
TEST_F(SuspendCheckTest, CFG6) {
- const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+ const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_NEZ, 0xFFFF,
Instruction::RETURN_VOID);