summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/boolean_simplifier.cc11
-rw-r--r--compiler/optimizing/builder.cc16
-rw-r--r--compiler/optimizing/code_generator.cc24
-rw-r--r--compiler/optimizing/code_generator_arm.cc32
-rw-r--r--compiler/optimizing/code_generator_arm.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc412
-rw-r--r--compiler/optimizing/code_generator_arm64.h20
-rw-r--r--compiler/optimizing/code_generator_x86.cc196
-rw-r--r--compiler/optimizing/code_generator_x86.h8
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc289
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/dead_code_elimination.cc81
-rw-r--r--compiler/optimizing/dead_code_elimination.h11
-rw-r--r--compiler/optimizing/graph_checker.cc65
-rw-r--r--compiler/optimizing/graph_visualizer.cc4
-rw-r--r--compiler/optimizing/gvn.cc2
-rw-r--r--compiler/optimizing/inliner.cc2
-rw-r--r--compiler/optimizing/instruction_simplifier.cc97
-rw-r--r--compiler/optimizing/intrinsics_arm.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc4
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc65
-rw-r--r--compiler/optimizing/locations.h23
-rw-r--r--compiler/optimizing/nodes.cc326
-rw-r--r--compiler/optimizing/nodes.h133
-rw-r--r--compiler/optimizing/optimization.cc4
-rw-r--r--compiler/optimizing/optimization.h2
-rw-r--r--compiler/optimizing/optimizing_compiler.cc62
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h12
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc309
-rw-r--r--compiler/optimizing/parallel_move_resolver.h120
-rw-r--r--compiler/optimizing/parallel_move_test.cc344
-rw-r--r--compiler/optimizing/reference_type_propagation.cc105
-rw-r--r--compiler/optimizing/register_allocator.cc49
-rw-r--r--compiler/optimizing/register_allocator.h8
-rw-r--r--compiler/optimizing/register_allocator_test.cc4
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h38
-rw-r--r--compiler/optimizing/stack_map_stream.cc359
-rw-r--r--compiler/optimizing/stack_map_stream.h420
-rw-r--r--compiler/optimizing/stack_map_test.cc42
40 files changed, 2513 insertions, 1198 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 06328f2490..30c89f2d15 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -72,8 +72,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
return graph->GetIntConstant(0);
}
} else {
- // General case when 'cond' is another instruction of type boolean.
- DCHECK_EQ(cond->GetType(), Primitive::Type::kPrimBoolean);
+ // General case when 'cond' is another instruction of type boolean,
+ // as verified by SSAChecker.
return new (allocator) HBooleanNot(cond);
}
}
@@ -120,8 +120,11 @@ void HBooleanSimplifier::Run() {
phi->ReplaceWith(replacement);
merge_block->RemovePhi(phi);
- // Link the start/end blocks and remove empty branches.
- graph_->MergeEmptyBranches(block, merge_block);
+ // Delete the true branch and merge the resulting chain of blocks
+ // 'block->false_block->merge_block' into one.
+ true_block->DisconnectAndDelete();
+ block->MergeWith(false_block);
+ block->MergeWith(merge_block);
// Remove the original condition if it is now unused.
if (!if_condition->HasUses()) {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 8a64d81485..818d671b5b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -520,8 +520,24 @@ void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse) {
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
}
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
+ // dex compilation unit is null only when unit testing.
+ if (cu == nullptr) {
+ return false;
+ }
+
+ Thread* self = Thread::Current();
+ return cu->IsConstructor()
+ && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+}
+
void HGraphBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type) {
if (type == Primitive::kPrimVoid) {
+ // Note that we might insert redundant barriers when inlining `super` calls.
+ // TODO: add a data flow analysis to get rid of duplicate barriers.
+ if (RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_)) {
+ current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore));
+ }
current_block_->AddInstruction(new (arena_) HReturnVoid());
} else {
HInstruction* value = LoadLocal(instruction.VRegA(), type);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8ab759d393..5163395cac 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -612,7 +612,7 @@ void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const {
}
void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
- uint32_t size = stack_map_stream_.ComputeNeededSize();
+ uint32_t size = stack_map_stream_.PrepareForFillIn();
data->resize(size);
MemoryRegion region(data->data(), size);
stack_map_stream_.FillIn(region);
@@ -654,7 +654,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
if (instruction == nullptr) {
// For stack overflow checks.
- stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
+ stack_map_stream_.BeginStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
+ stack_map_stream_.EndStackMapEntry();
return;
}
LocationSummary* locations = instruction->GetLocations();
@@ -672,12 +673,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
}
// The register mask must be a subset of callee-save registers.
DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
- stack_map_stream_.AddStackMapEntry(dex_pc,
- pc_info.native_pc,
- register_mask,
- locations->GetStackMask(),
- environment_size,
- inlining_depth);
+ stack_map_stream_.BeginStackMapEntry(dex_pc,
+ pc_info.native_pc,
+ register_mask,
+ locations->GetStackMask(),
+ environment_size,
+ inlining_depth);
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0; i < environment_size; ++i) {
@@ -823,11 +824,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
}
+ stack_map_stream_.EndStackMapEntry();
}
bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
- return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck();
+
+ return (first_next_not_move != nullptr)
+ && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0));
}
void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
@@ -842,7 +846,7 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
return;
}
- if (!instr->CanDoImplicitNullCheck()) {
+ if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) {
return;
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 7e03865b5f..ae1fb537bb 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1214,6 +1214,14 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant)
UNUSED(constant);
}
+void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
@@ -3890,9 +3898,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
- // TODO: avoid this check if we know obj is not null.
- __ cmp(obj, ShifterOperand(0));
- __ b(&zero, EQ);
+ // avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ cmp(obj, ShifterOperand(0));
+ __ b(&zero, EQ);
+ }
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadWord, out, obj, class_offset);
__ cmp(out, ShifterOperand(cls));
@@ -3911,8 +3921,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ LoadImmediate(out, 1);
__ b(&done);
}
- __ Bind(&zero);
- __ LoadImmediate(out, 0);
+
+ if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) {
+ __ Bind(&zero);
+ __ LoadImmediate(out, 0);
+ }
+
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
}
@@ -3938,9 +3952,11 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ cmp(obj, ShifterOperand(0));
- __ b(slow_path->GetExitLabel(), EQ);
+ // avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ cmp(obj, ShifterOperand(0));
+ __ b(slow_path->GetExitLabel(), EQ);
+ }
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadWord, temp, obj, class_offset);
__ cmp(temp, ShifterOperand(cls));
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 06f425ea21..600903621d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -96,10 +96,10 @@ class InvokeDexCallingConventionVisitor {
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
-class ParallelMoveResolverARM : public ParallelMoveResolver {
+class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4be46126e9..1c6debdded 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -425,30 +425,67 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
CodeGenerator::Finalize(allocator);
}
-void ParallelMoveResolverARM64::EmitMove(size_t index) {
- MoveOperands* move = moves_.Get(index);
- codegen_->MoveLocation(move->GetDestination(), move->GetSource());
-}
-
-void ParallelMoveResolverARM64::EmitSwap(size_t index) {
- MoveOperands* move = moves_.Get(index);
- codegen_->SwapLocations(move->GetDestination(), move->GetSource());
+void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
+ // Note: There are 6 kinds of moves:
+ // 1. constant -> GPR/FPR (non-cycle)
+ // 2. constant -> stack (non-cycle)
+ // 3. GPR/FPR -> GPR/FPR
+ // 4. GPR/FPR -> stack
+ // 5. stack -> GPR/FPR
+ // 6. stack -> stack (non-cycle)
+ // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
+ // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
+ // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
+ // dependency.
+ vixl_temps_.Open(GetVIXLAssembler());
+}
+
+void ParallelMoveResolverARM64::FinishEmitNativeCode() {
+ vixl_temps_.Close();
+}
+
+Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
+ DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister ||
+ kind == Location::kStackSlot || kind == Location::kDoubleStackSlot);
+ kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister;
+ Location scratch = GetScratchLocation(kind);
+ if (!scratch.Equals(Location::NoLocation())) {
+ return scratch;
+ }
+ // Allocate from VIXL temp registers.
+ if (kind == Location::kRegister) {
+ scratch = LocationFrom(vixl_temps_.AcquireX());
+ } else {
+ DCHECK(kind == Location::kFpuRegister);
+ scratch = LocationFrom(vixl_temps_.AcquireD());
+ }
+ AddScratchLocation(scratch);
+ return scratch;
}
-void ParallelMoveResolverARM64::RestoreScratch(int reg) {
- __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
+ if (loc.IsRegister()) {
+ vixl_temps_.Release(XRegisterFrom(loc));
+ } else {
+ DCHECK(loc.IsFpuRegister());
+ vixl_temps_.Release(DRegisterFrom(loc));
+ }
+ RemoveScratchLocation(loc);
}
-void ParallelMoveResolverARM64::SpillScratch(int reg) {
- __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize));
+void ParallelMoveResolverARM64::EmitMove(size_t index) {
+ MoveOperands* move = moves_.Get(index);
+ codegen_->MoveLocation(move->GetDestination(), move->GetSource());
}
void CodeGeneratorARM64::GenerateFrameEntry() {
+ MacroAssembler* masm = GetVIXLAssembler();
+ BlockPoolsScope block_pools(masm);
__ Bind(&frame_entry_label_);
bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
if (do_overflow_check) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
+ UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireX();
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
@@ -474,6 +511,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
}
void CodeGeneratorARM64::GenerateFrameExit() {
+ BlockPoolsScope block_pools(GetVIXLAssembler());
GetAssembler()->cfi().RememberState();
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
@@ -726,10 +764,10 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
if (destination.IsRegister()) {
__ Mov(Register(dst), RegisterFrom(source, type));
} else {
+ DCHECK(destination.IsFpuRegister());
__ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
}
}
-
} else { // The destination is not a register. It must be a stack slot.
DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
if (source.IsRegister() || source.IsFpuRegister()) {
@@ -772,67 +810,6 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
}
}
-void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
- DCHECK(!loc1.IsConstant());
- DCHECK(!loc2.IsConstant());
-
- if (loc1.Equals(loc2)) {
- return;
- }
-
- UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
-
- bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
- bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
- bool is_fp_reg1 = loc1.IsFpuRegister();
- bool is_fp_reg2 = loc2.IsFpuRegister();
-
- if (loc2.IsRegister() && loc1.IsRegister()) {
- Register r1 = XRegisterFrom(loc1);
- Register r2 = XRegisterFrom(loc2);
- Register tmp = temps.AcquireSameSizeAs(r1);
- __ Mov(tmp, r2);
- __ Mov(r2, r1);
- __ Mov(r1, tmp);
- } else if (is_fp_reg2 && is_fp_reg1) {
- FPRegister r1 = DRegisterFrom(loc1);
- FPRegister r2 = DRegisterFrom(loc2);
- FPRegister tmp = temps.AcquireSameSizeAs(r1);
- __ Fmov(tmp, r2);
- __ Fmov(r2, r1);
- __ Fmov(r1, tmp);
- } else if (is_slot1 != is_slot2) {
- MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2);
- Location reg_loc = is_slot1 ? loc2 : loc1;
- CPURegister reg, tmp;
- if (reg_loc.IsFpuRegister()) {
- reg = DRegisterFrom(reg_loc);
- tmp = temps.AcquireD();
- } else {
- reg = XRegisterFrom(reg_loc);
- tmp = temps.AcquireX();
- }
- __ Ldr(tmp, mem);
- __ Str(reg, mem);
- if (reg_loc.IsFpuRegister()) {
- __ Fmov(FPRegister(reg), FPRegister(tmp));
- } else {
- __ Mov(Register(reg), Register(tmp));
- }
- } else if (is_slot1 && is_slot2) {
- MemOperand mem1 = StackOperandFrom(loc1);
- MemOperand mem2 = StackOperandFrom(loc2);
- Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX();
- Register tmp2 = temps.AcquireSameSizeAs(tmp1);
- __ Ldr(tmp1, mem1);
- __ Ldr(tmp2, mem2);
- __ Str(tmp1, mem2);
- __ Str(tmp2, mem1);
- } else {
- LOG(FATAL) << "Unimplemented";
- }
-}
-
void CodeGeneratorARM64::Load(Primitive::Type type,
CPURegister dst,
const MemOperand& src) {
@@ -865,7 +842,9 @@ void CodeGeneratorARM64::Load(Primitive::Type type,
void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
CPURegister dst,
const MemOperand& src) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
+ MacroAssembler* masm = GetVIXLAssembler();
+ BlockPoolsScope block_pools(masm);
+ UseScratchRegisterScope temps(masm);
Register temp_base = temps.AcquireX();
Primitive::Type type = instruction->GetType();
@@ -995,6 +974,7 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path) {
+ BlockPoolsScope block_pools(GetVIXLAssembler());
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blr(lr);
if (instruction != nullptr) {
@@ -1130,6 +1110,83 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
}
}
+void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+}
+
+void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+
+ MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
+
+ if (field_info.IsVolatile()) {
+ if (use_acquire_release) {
+ // NB: LoadAcquire will record the pc info if needed.
+ codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+ } else {
+ codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+}
+
+void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+
+ Register obj = InputRegisterAt(instruction, 0);
+ CPURegister value = InputCPURegisterAt(instruction, 1);
+ Offset offset = field_info.GetFieldOffset();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
+
+ if (field_info.IsVolatile()) {
+ if (use_acquire_release) {
+ codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ } else {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ codegen_->MarkGCCard(obj, Register(value));
+ }
+}
+
void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
Primitive::Type type = instr->GetType();
@@ -1264,7 +1321,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
Location index = locations->InAt(1);
size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
MemOperand source = HeapOperand(obj);
- UseScratchRegisterScope temps(GetVIXLAssembler());
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+ BlockPoolsScope block_pools(masm);
if (index.IsConstant()) {
offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -1287,22 +1346,23 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
}
void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+ BlockPoolsScope block_pools(GetVIXLAssembler());
__ Ldr(OutputRegister(instruction),
HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
- Primitive::Type value_type = instruction->GetComponentType();
- bool is_object = value_type == Primitive::kPrimNot;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
- if (is_object) {
+ if (instruction->NeedsTypeCheck()) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
} else {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
@@ -1315,31 +1375,42 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
Primitive::Type value_type = instruction->GetComponentType();
- if (value_type == Primitive::kPrimNot) {
+ LocationSummary* locations = instruction->GetLocations();
+ bool needs_runtime_call = locations->WillCall();
+
+ if (needs_runtime_call) {
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
} else {
- LocationSummary* locations = instruction->GetLocations();
Register obj = InputRegisterAt(instruction, 0);
CPURegister value = InputCPURegisterAt(instruction, 2);
Location index = locations->InAt(1);
size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
MemOperand destination = HeapOperand(obj);
- UseScratchRegisterScope temps(GetVIXLAssembler());
+ MacroAssembler* masm = GetVIXLAssembler();
+ BlockPoolsScope block_pools(masm);
+ {
+ // We use a block to end the scratch scope before the write barrier, thus
+ // freeing the temporary registers so they can be used in `MarkGCCard`.
+ UseScratchRegisterScope temps(masm);
+
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
+ destination = HeapOperand(obj, offset);
+ } else {
+ Register temp = temps.AcquireSameSizeAs(obj);
+ Register index_reg = InputRegisterAt(instruction, 1);
+ __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type)));
+ destination = HeapOperand(temp, offset);
+ }
- if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
- destination = HeapOperand(obj, offset);
- } else {
- Register temp = temps.AcquireSameSizeAs(obj);
- Register index_reg = InputRegisterAt(instruction, 1);
- __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type)));
- destination = HeapOperand(temp, offset);
+ codegen_->Store(value_type, value, destination);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) {
+ codegen_->MarkGCCard(obj, value.W());
}
-
- codegen_->Store(value_type, value, destination);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -1381,8 +1452,10 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ Cbz(obj, slow_path->GetExitLabel());
+ // Avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Cbz(obj, slow_path->GetExitLabel());
+ }
// Compare the class of `obj` with `cls`.
__ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset()));
__ Cmp(obj_cls, cls);
@@ -1750,72 +1823,19 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
- locations->SetOut(Location::RequiresFpuRegister());
- } else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ HandleFieldGet(instruction);
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
-
- if (instruction->IsVolatile()) {
- if (use_acquire_release) {
- // NB: LoadAcquire will record the pc info if needed.
- codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
- } else {
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // For IRIW sequential consistency kLoadAny is not sufficient.
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
- } else {
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- }
+ HandleFieldSet(instruction);
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- Register obj = InputRegisterAt(instruction, 0);
- CPURegister value = InputCPURegisterAt(instruction, 1);
- Offset offset = instruction->GetFieldOffset();
- Primitive::Type field_type = instruction->GetFieldType();
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
-
- if (instruction->IsVolatile()) {
- if (use_acquire_release) {
- codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- } else {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
- codegen_->Store(field_type, value, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
- } else {
- codegen_->Store(field_type, value, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
-
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- codegen_->MarkGCCard(obj, Register(value));
- }
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -1837,9 +1857,11 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
vixl::Label done;
// Return 0 if `obj` is null.
- // TODO: Avoid this check if we know `obj` is not null.
- __ Mov(out, 0);
- __ Cbz(obj, &done);
+ // Avoid null check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Mov(out, 0);
+ __ Cbz(obj, &done);
+ }
// Compare the class of `obj` with `cls`.
__ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset()));
@@ -1914,7 +1936,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
// The register ip1 is required to be used for the hidden argument in
// art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
- UseScratchRegisterScope scratch_scope(GetVIXLAssembler());
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope scratch_scope(masm);
+ BlockPoolsScope block_pools(masm);
scratch_scope.Exclude(ip1);
__ Mov(ip1, invoke->GetDexMethodIndex());
@@ -2000,6 +2024,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
return;
}
+ BlockPoolsScope block_pools(GetVIXLAssembler());
Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
codegen_->GenerateStaticOrDirectCall(invoke, temp);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2018,6 +2043,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
Offset class_offset = mirror::Object::ClassOffset();
Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
__ Ldr(temp, MemOperand(sp, receiver.GetStackIndex()));
@@ -2318,8 +2345,9 @@ void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instru
if (codegen_->CanMoveNullCheckToUser(instruction)) {
return;
}
- Location obj = instruction->GetLocations()->InAt(0);
+ BlockPoolsScope block_pools(GetVIXLAssembler());
+ Location obj = instruction->GetLocations()->InAt(0);
__ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
}
@@ -2446,6 +2474,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
Primitive::Type return_type = instruction->InputAt(0)->GetType();
@@ -2519,67 +2555,19 @@ void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
}
void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
- locations->SetOut(Location::RequiresFpuRegister());
- } else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ HandleFieldGet(instruction);
}
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
-
- if (instruction->IsVolatile()) {
- if (use_acquire_release) {
- // NB: LoadAcquire will record the pc info if needed.
- codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
- } else {
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
- // For IRIW sequential consistency kLoadAny is not sufficient.
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
- } else {
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
- }
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- }
+ HandleFieldSet(instruction);
}
void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- Register cls = InputRegisterAt(instruction, 0);
- CPURegister value = InputCPURegisterAt(instruction, 1);
- Offset offset = instruction->GetFieldOffset();
- Primitive::Type field_type = instruction->GetFieldType();
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
-
- if (instruction->IsVolatile()) {
- if (use_acquire_release) {
- codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
- } else {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
- codegen_->Store(field_type, value, HeapOperand(cls, offset));
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
- } else {
- codegen_->Store(field_type, value, HeapOperand(cls, offset));
- }
-
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- codegen_->MarkGCCard(cls, Register(value));
- }
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 07c6dd059a..5a358671cc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -159,6 +159,8 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleShift(HBinaryOperation* instr);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -185,8 +187,10 @@ class LocationsBuilderARM64 : public HGraphVisitor {
private:
void HandleBinaryOp(HBinaryOperation* instr);
- void HandleShift(HBinaryOperation* instr);
+ void HandleFieldSet(HInstruction* instruction);
+ void HandleFieldGet(HInstruction* instruction);
void HandleInvoke(HInvoke* instr);
+ void HandleShift(HBinaryOperation* instr);
CodeGeneratorARM64* const codegen_;
InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -194,15 +198,17 @@ class LocationsBuilderARM64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
};
-class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
public:
ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
+ protected:
+ void PrepareForEmitNativeCode() OVERRIDE;
+ void FinishEmitNativeCode() OVERRIDE;
+ Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE;
+ void FreeScratchLocation(Location loc) OVERRIDE;
void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
private:
Arm64Assembler* GetAssembler() const;
@@ -211,6 +217,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
}
CodeGeneratorARM64* const codegen_;
+ vixl::UseScratchRegisterScope vixl_temps_;
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
};
@@ -318,7 +325,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
// locations, and is used for optimisation and debugging.
void MoveLocation(Location destination, Location source,
Primitive::Type type = Primitive::kPrimVoid);
- void SwapLocations(Location loc_1, Location loc_2);
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
void LoadCurrentMethod(vixl::Register current_method);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0f1175563e..c604842d86 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -877,7 +877,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
if (rhs.IsRegister()) {
__ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
} else if (rhs.IsConstant()) {
- int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
if (constant == 0) {
__ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
} else {
@@ -1120,6 +1120,14 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant)
UNUSED(constant);
}
+void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
@@ -1212,6 +1220,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
codegen_->GenerateStaticOrDirectCall(
invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -1547,10 +1556,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::Any());
break;
case Primitive::kPrimDouble:
@@ -1580,10 +1587,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimLong:
// Processing a Dex `long-to-double' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::Any());
break;
case Primitive::kPrimFloat:
@@ -1804,37 +1809,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimLong: {
// Processing a Dex `long-to-float' instruction.
- Register low = in.AsRegisterPairLow<Register>();
- Register high = in.AsRegisterPairHigh<Register>();
- XmmRegister result = out.AsFpuRegister<XmmRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-
- // Operations use doubles for precision reasons (each 32-bit
- // half of a long fits in the 53-bit mantissa of a double,
- // but not in the 24-bit mantissa of a float). This is
- // especially important for the low bits. The result is
- // eventually converted to float.
-
- // low = low - 2^31 (to prevent bit 31 of `low` to be
- // interpreted as a sign bit)
- __ subl(low, Immediate(0x80000000));
- // temp = int-to-double(high)
- __ cvtsi2sd(temp, high);
- // temp = temp * 2^32
- __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
- __ mulsd(temp, constant);
- // result = int-to-double(low)
- __ cvtsi2sd(result, low);
- // result = result + 2^31 (restore the original value of `low`)
- __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
- __ addsd(result, constant);
- // result = result + temp
- __ addsd(result, temp);
- // result = double-to-float(result)
- __ cvtsd2ss(result, result);
- // Restore low.
- __ addl(low, Immediate(0x80000000));
+ size_t adjustment = 0;
+
+ // Create stack space for the call to
+ // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
+ adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
+ __ subl(ESP, Immediate(adjustment));
+ }
+
+ // Load the value to the FP stack, using temporaries if needed.
+ PushOntoFPStack(in, 0, adjustment, false, true);
+
+ if (out.IsStackSlot()) {
+ __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
+ } else {
+ __ fstps(Address(ESP, 0));
+ Location stack_temp = Location::StackSlot(0);
+ codegen_->Move32(out, stack_temp);
+ }
+
+ // Remove the temporary stack space we allocated.
+ if (adjustment != 0) {
+ __ addl(ESP, Immediate(adjustment));
+ }
break;
}
@@ -1863,29 +1862,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimLong: {
// Processing a Dex `long-to-double' instruction.
- Register low = in.AsRegisterPairLow<Register>();
- Register high = in.AsRegisterPairHigh<Register>();
- XmmRegister result = out.AsFpuRegister<XmmRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-
- // low = low - 2^31 (to prevent bit 31 of `low` to be
- // interpreted as a sign bit)
- __ subl(low, Immediate(0x80000000));
- // temp = int-to-double(high)
- __ cvtsi2sd(temp, high);
- // temp = temp * 2^32
- __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
- __ mulsd(temp, constant);
- // result = int-to-double(low)
- __ cvtsi2sd(result, low);
- // result = result + 2^31 (restore the original value of `low`)
- __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
- __ addsd(result, constant);
- // result = result + temp
- __ addsd(result, temp);
- // Restore low.
- __ addl(low, Immediate(0x80000000));
+ size_t adjustment = 0;
+
+ // Create stack space for the call to
+ // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
+ adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
+ __ subl(ESP, Immediate(adjustment));
+ }
+
+ // Load the value to the FP stack, using temporaries if needed.
+ PushOntoFPStack(in, 0, adjustment, false, true);
+
+ if (out.IsDoubleStackSlot()) {
+ __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
+ } else {
+ __ fstpl(Address(ESP, 0));
+ Location stack_temp = Location::DoubleStackSlot(0);
+ codegen_->Move64(out, stack_temp);
+ }
+
+ // Remove the temporary stack space we allocated.
+ if (adjustment != 0) {
+ __ addl(ESP, Immediate(adjustment));
+ }
break;
}
@@ -2225,24 +2226,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
}
}
-void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset,
- uint32_t stack_adjustment, bool is_float) {
+void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
+ uint32_t temp_offset,
+ uint32_t stack_adjustment,
+ bool is_fp,
+ bool is_wide) {
if (source.IsStackSlot()) {
- DCHECK(is_float);
- __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ DCHECK(!is_wide);
+ if (is_fp) {
+ __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else {
+ __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ }
} else if (source.IsDoubleStackSlot()) {
- DCHECK(!is_float);
- __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ DCHECK(is_wide);
+ if (is_fp) {
+ __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else {
+ __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ }
} else {
// Write the value to the temporary location on the stack and load to FP stack.
- if (is_float) {
+ if (!is_wide) {
Location stack_temp = Location::StackSlot(temp_offset);
codegen_->Move32(stack_temp, source);
- __ flds(Address(ESP, temp_offset));
+ if (is_fp) {
+ __ flds(Address(ESP, temp_offset));
+ } else {
+ __ filds(Address(ESP, temp_offset));
+ }
} else {
Location stack_temp = Location::DoubleStackSlot(temp_offset);
codegen_->Move64(stack_temp, source);
- __ fldl(Address(ESP, temp_offset));
+ if (is_fp) {
+ __ fldl(Address(ESP, temp_offset));
+ } else {
+ __ fildl(Address(ESP, temp_offset));
+ }
}
}
}
@@ -2261,8 +2281,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
__ subl(ESP, Immediate(2 * elem_size));
// Load the values to the FP stack in reverse order, using temporaries if needed.
- PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
- PushOntoFPStack(first, 0, 2 * elem_size, is_float);
+ const bool is_wide = !is_float;
+ PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
+ PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
// Loop doing FPREM until we stabilize.
Label retry;
@@ -3098,7 +3119,6 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
}
DCHECK(!IsLeafMethod());
- RecordPcInfo(invoke, invoke->GetDexPc());
}
void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) {
@@ -4230,9 +4250,11 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
SlowPathCodeX86* slow_path = nullptr;
// Return 0 if `obj` is null.
- // TODO: avoid this check if we know obj is not null.
- __ testl(obj, obj);
- __ j(kEqual, &zero);
+ // Avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ testl(obj, obj);
+ __ j(kEqual, &zero);
+ }
__ movl(out, Address(obj, class_offset));
// Compare the class of `obj` with `cls`.
if (cls.IsRegister()) {
@@ -4257,8 +4279,12 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ movl(out, Immediate(1));
__ jmp(&done);
}
- __ Bind(&zero);
- __ movl(out, Immediate(0));
+
+ if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) {
+ __ Bind(&zero);
+ __ movl(out, Immediate(0));
+ }
+
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
}
@@ -4283,11 +4309,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ testl(obj, obj);
- __ j(kEqual, slow_path->GetExitLabel());
- __ movl(temp, Address(obj, class_offset));
+ // Avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ testl(obj, obj);
+ __ j(kEqual, slow_path->GetExitLabel());
+ }
+ __ movl(temp, Address(obj, class_offset));
// Compare the class of `obj` with `cls`.
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<Register>());
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 368ae0fb0e..8bd3cd3585 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -93,10 +93,10 @@ class InvokeDexCallingConventionVisitor {
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
-class ParallelMoveResolverX86 : public ParallelMoveResolver {
+class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
@@ -174,8 +174,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
+ // `is_wide` specifies whether it is long/double or not.
void PushOntoFPStack(Location source, uint32_t temp_offset,
- uint32_t stack_adjustment, bool is_float);
+ uint32_t stack_adjustment, bool is_fp, bool is_wide);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5b681fa62b..47425fb9ae 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1023,14 +1023,14 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
switch (compare->InputAt(0)->GetType()) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -1052,24 +1052,46 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
CpuRegister left_reg = left.AsRegister<CpuRegister>();
if (right.IsConstant()) {
int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- DCHECK(IsInt<32>(value));
- if (value == 0) {
- __ testq(left_reg, left_reg);
+ if (IsInt<32>(value)) {
+ if (value == 0) {
+ __ testq(left_reg, left_reg);
+ } else {
+ __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+ }
} else {
- __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+ // Value won't fit in an int.
+ __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
}
+ } else if (right.IsDoubleStackSlot()) {
+ __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
} else {
__ cmpq(left_reg, right.AsRegister<CpuRegister>());
}
break;
}
case Primitive::kPrimFloat: {
- __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+ if (right.IsConstant()) {
+ float value = right.GetConstant()->AsFloatConstant()->GetValue();
+ __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
+ } else if (right.IsStackSlot()) {
+ __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+ } else {
+ __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
+ }
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
break;
}
case Primitive::kPrimDouble: {
- __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+ XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
+ if (right.IsConstant()) {
+ double value = right.GetConstant()->AsDoubleConstant()->GetValue();
+ __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
+ } else if (right.IsDoubleStackSlot()) {
+ __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+ } else {
+ __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
+ }
__ j(kUnordered, compare->IsGtBias() ? &greater : &less);
break;
}
@@ -1145,6 +1167,14 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* consta
UNUSED(constant);
}
+void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+ GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
@@ -1170,8 +1200,7 @@ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0,
- Location::FpuRegisterLocation(XMM0));
+ locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
break;
default:
@@ -1411,7 +1440,6 @@ void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
break;
@@ -1439,26 +1467,22 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
case Primitive::kPrimFloat: {
DCHECK(in.Equals(out));
- CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
- XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
// Implement float negation with an exclusive or with value
// 0x80000000 (mask for bit 31, representing the sign of a
// single-precision floating-point number).
- __ movq(constant, Immediate(INT64_C(0x80000000)));
- __ movd(mask, constant);
+ __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
__ xorps(out.AsFpuRegister<XmmRegister>(), mask);
break;
}
case Primitive::kPrimDouble: {
DCHECK(in.Equals(out));
- CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
- XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
// Implement double negation with an exclusive or with value
// 0x8000000000000000 (mask for bit 63, representing the sign of
// a double-precision floating-point number).
- __ movq(constant, Immediate(INT64_C(0x8000000000000000)));
- __ movd(mask, constant);
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
__ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
break;
}
@@ -1605,19 +1629,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-float' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-float' instruction.
- locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
@@ -1636,19 +1660,19 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-double' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimLong:
// Processing a Dex `long-to-double' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
// Processing a Dex `float-to-double' instruction.
- locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
@@ -1902,17 +1926,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-float' instruction.
- __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+ if (in.IsRegister()) {
+ __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+ } else if (in.IsConstant()) {
+ int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (v == 0) {
+ __ xorps(dest, dest);
+ } else {
+ __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+ }
+ } else {
+ __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()), false);
+ }
break;
case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+ if (in.IsRegister()) {
+ __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+ } else if (in.IsConstant()) {
+ int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (v == 0) {
+ __ xorps(dest, dest);
+ } else {
+ __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+ }
+ } else {
+ __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()), true);
+ }
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-float' instruction.
- __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+ if (in.IsFpuRegister()) {
+ __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+ } else if (in.IsConstant()) {
+ double v = in.GetConstant()->AsDoubleConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (bit_cast<int64_t, double>(v) == 0) {
+ __ xorps(dest, dest);
+ } else {
+ __ movss(dest, codegen_->LiteralFloatAddress(static_cast<float>(v)));
+ }
+ } else {
+ __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()));
+ }
break;
default:
@@ -1930,17 +1993,56 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-double' instruction.
- __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+ if (in.IsRegister()) {
+ __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
+ } else if (in.IsConstant()) {
+ int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (v == 0) {
+ __ xorpd(dest, dest);
+ } else {
+ __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+ }
+ } else {
+ __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()), false);
+ }
break;
case Primitive::kPrimLong:
// Processing a Dex `long-to-double' instruction.
- __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+ if (in.IsRegister()) {
+ __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+ } else if (in.IsConstant()) {
+ int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (v == 0) {
+ __ xorpd(dest, dest);
+ } else {
+ __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+ }
+ } else {
+ __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()), true);
+ }
break;
case Primitive::kPrimFloat:
// Processing a Dex `float-to-double' instruction.
- __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+ if (in.IsFpuRegister()) {
+ __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
+ } else if (in.IsConstant()) {
+ float v = in.GetConstant()->AsFloatConstant()->GetValue();
+ XmmRegister dest = out.AsFpuRegister<XmmRegister>();
+ if (bit_cast<int32_t, float>(v) == 0) {
+ __ xorpd(dest, dest);
+ } else {
+ __ movsd(dest, codegen_->LiteralDoubleAddress(static_cast<double>(v)));
+ }
+ } else {
+ __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), in.GetStackIndex()));
+ }
break;
default:
@@ -3120,7 +3222,7 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
locations->SetInAt(1, Location::RequiresFpuRegister());
} else {
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
}
if (needs_write_barrier) {
// Temporary registers for the write barrier.
@@ -3147,24 +3249,46 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+ if (value.IsConstant()) {
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movb(Address(base, offset), Immediate(v));
+ } else {
+ __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+ }
break;
}
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+ if (value.IsConstant()) {
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movw(Address(base, offset), Immediate(v));
+ } else {
+ __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+ }
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+ if (value.IsConstant()) {
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movw(Address(base, offset), Immediate(v));
+ } else {
+ __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+ }
break;
}
case Primitive::kPrimLong: {
- __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+ if (value.IsConstant()) {
+ int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(v));
+ int32_t v_32 = v;
+ __ movq(Address(base, offset), Immediate(v_32));
+ } else {
+ __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+ }
break;
}
@@ -3283,8 +3407,7 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(
- 1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
@@ -3423,7 +3546,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
1, Location::RegisterOrConstant(instruction->InputAt(1)));
locations->SetInAt(2, Location::RequiresRegister());
if (value_type == Primitive::kPrimLong) {
- locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
} else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
@@ -3511,8 +3634,8 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
} else {
DCHECK(value.IsConstant()) << value;
- __ movl(Address(obj, offset),
- Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movl(Address(obj, offset), Immediate(v));
}
} else {
DCHECK(index.IsRegister()) << index;
@@ -3521,8 +3644,9 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
value.AsRegister<CpuRegister>());
} else {
DCHECK(value.IsConstant()) << value;
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
__ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
- Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+ Immediate(v));
}
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -3546,12 +3670,25 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- DCHECK(value.IsRegister());
- __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+ if (value.IsRegister()) {
+ __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
+ } else {
+ int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(v));
+ int32_t v_32 = v;
+ __ movq(Address(obj, offset), Immediate(v_32));
+ }
} else {
- DCHECK(value.IsRegister());
- __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
- value.AsRegister<CpuRegister>());
+ if (value.IsRegister()) {
+ __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+ value.AsRegister<CpuRegister>());
+ } else {
+ int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(v));
+ int32_t v_32 = v;
+ __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+ Immediate(v_32));
+ }
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
@@ -4044,9 +4181,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
SlowPathCodeX86_64* slow_path = nullptr;
// Return 0 if `obj` is null.
- // TODO: avoid this check if we know obj is not null.
- __ testl(obj, obj);
- __ j(kEqual, &zero);
+ // Avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ testl(obj, obj);
+ __ j(kEqual, &zero);
+ }
// Compare the class of `obj` with `cls`.
__ movl(out, Address(obj, class_offset));
if (cls.IsRegister()) {
@@ -4070,8 +4209,12 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ movl(out, Immediate(1));
__ jmp(&done);
}
- __ Bind(&zero);
- __ movl(out, Immediate(0));
+
+ if (instruction->MustDoNullCheck() || instruction->IsClassFinal()) {
+ __ Bind(&zero);
+ __ movl(out, Immediate(0));
+ }
+
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
}
@@ -4096,9 +4239,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ testl(obj, obj);
- __ j(kEqual, slow_path->GetExitLabel());
+ // Avoid null check if we know obj is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ testl(obj, obj);
+ __ j(kEqual, slow_path->GetExitLabel());
+ }
// Compare the class of `obj` with `cls`.
__ movl(temp, Address(obj, class_offset));
if (cls.IsRegister()) {
@@ -4137,13 +4282,7 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio
DCHECK(instruction->GetResultType() == Primitive::kPrimInt
|| instruction->GetResultType() == Primitive::kPrimLong);
locations->SetInAt(0, Location::RequiresRegister());
- if (instruction->GetType() == Primitive::kPrimInt) {
- locations->SetInAt(1, Location::Any());
- } else {
- // We can handle 32 bit constants.
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
- }
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
}
@@ -4204,25 +4343,43 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
if (second.IsConstant()) {
second_is_constant = true;
value = second.GetConstant()->AsLongConstant()->GetValue();
- DCHECK(IsInt<32>(value));
}
+ bool is_int32_value = IsInt<32>(value);
if (instruction->IsAnd()) {
if (second_is_constant) {
- __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+ if (is_int32_value) {
+ __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ andq(first_reg, codegen_->LiteralInt64Address(value));
+ }
+ } else if (second.IsDoubleStackSlot()) {
+ __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
} else {
__ andq(first_reg, second.AsRegister<CpuRegister>());
}
} else if (instruction->IsOr()) {
if (second_is_constant) {
- __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+ if (is_int32_value) {
+ __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ orq(first_reg, codegen_->LiteralInt64Address(value));
+ }
+ } else if (second.IsDoubleStackSlot()) {
+ __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
} else {
__ orq(first_reg, second.AsRegister<CpuRegister>());
}
} else {
DCHECK(instruction->IsXor());
if (second_is_constant) {
- __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+ if (is_int32_value) {
+ __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ xorq(first_reg, codegen_->LiteralInt64Address(value));
+ }
+ } else if (second.IsDoubleStackSlot()) {
+ __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
} else {
__ xorq(first_reg, second.AsRegister<CpuRegister>());
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index b4876ef161..6cdc82262c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -102,10 +102,10 @@ class SlowPathCodeX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
};
-class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
public:
ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
- : ParallelMoveResolver(allocator), codegen_(codegen) {}
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
void EmitMove(size_t index) OVERRIDE;
void EmitSwap(size_t index) OVERRIDE;
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index fc3dd01ef5..91cd60acce 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -20,10 +20,78 @@
namespace art {
-void HDeadCodeElimination::Run() {
+static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
+ int block_id = block->GetBlockId();
+ if (visited->IsBitSet(block_id)) {
+ return;
+ }
+ visited->SetBit(block_id);
+
+ HInstruction* last_instruction = block->GetLastInstruction();
+ if (last_instruction->IsIf()) {
+ HIf* if_instruction = last_instruction->AsIf();
+ HInstruction* condition = if_instruction->InputAt(0);
+ if (!condition->IsIntConstant()) {
+ MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+ MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+ } else if (condition->AsIntConstant()->IsOne()) {
+ MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
+ } else {
+ DCHECK(condition->AsIntConstant()->IsZero());
+ MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+ }
+ } else {
+ for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
+ MarkReachableBlocks(block->GetSuccessors().Get(i), visited);
+ }
+ }
+}
+
+void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
+ block->GetPhis().CountSize() + block->GetInstructions().CountSize());
+ }
+}
+
+void HDeadCodeElimination::RemoveDeadBlocks() {
+ // Classify blocks as reachable/unreachable.
+ ArenaAllocator* allocator = graph_->GetArena();
+ ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+ MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
+
+ // Remove all dead blocks. Process blocks in post-order, because removal needs
+ // the block's chain of dominators.
+ for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ if (live_blocks.IsBitSet(block->GetBlockId())) {
+ continue;
+ }
+ MaybeRecordDeadBlock(block);
+ block->DisconnectAndDelete();
+ }
+
+ // Connect successive blocks created by dead branches. Order does not matter.
+ for (HReversePostOrderIterator it(*graph_); !it.Done();) {
+ HBasicBlock* block = it.Current();
+ if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) {
+ it.Advance();
+ continue;
+ }
+ HBasicBlock* successor = block->GetSuccessors().Get(0);
+ if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) {
+ it.Advance();
+ continue;
+ }
+ block->MergeWith(successor);
+
+ // Reiterate on this block in case it can be merged with its new successor.
+ }
+}
+
+void HDeadCodeElimination::RemoveDeadInstructions() {
// Process basic blocks in post-order in the dominator tree, so that
- // a dead instruction depending on another dead instruction is
- // removed.
+ // a dead instruction depending on another dead instruction is removed.
for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
HBasicBlock* block = b.Current();
// Traverse this block's instructions in backward order and remove
@@ -38,11 +106,18 @@ void HDeadCodeElimination::Run() {
if (!inst->HasSideEffects()
&& !inst->CanThrow()
&& !inst->IsSuspendCheck()
+ && !inst->IsMemoryBarrier() // If we added an explicit barrier then we should keep it.
&& !inst->HasUses()) {
block->RemoveInstruction(inst);
+ MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
}
}
}
}
+void HDeadCodeElimination::Run() {
+ RemoveDeadBlocks();
+ RemoveDeadInstructions();
+}
+
} // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 3db2c3ff3f..0bea0fc1c2 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -19,6 +19,7 @@
#include "nodes.h"
#include "optimization.h"
+#include "optimizing_compiler_stats.h"
namespace art {
@@ -28,8 +29,10 @@ namespace art {
*/
class HDeadCodeElimination : public HOptimization {
public:
- explicit HDeadCodeElimination(HGraph* graph)
- : HOptimization(graph, true, kDeadCodeEliminationPassName) {}
+ HDeadCodeElimination(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kDeadCodeEliminationPassName)
+ : HOptimization(graph, true, name, stats) {}
void Run() OVERRIDE;
@@ -37,6 +40,10 @@ class HDeadCodeElimination : public HOptimization {
"dead_code_elimination";
private:
+ void MaybeRecordDeadBlock(HBasicBlock* block);
+ void RemoveDeadBlocks();
+ void RemoveDeadInstructions();
+
DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
};
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3a56c6c68f..e1649fd3cd 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -88,23 +88,36 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
// Visit this block's list of phis.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* current = it.Current();
// Ensure this block's list of phis contains only phis.
- if (!it.Current()->IsPhi()) {
+ if (!current->IsPhi()) {
AddError(StringPrintf("Block %d has a non-phi in its phi list.",
current_block_->GetBlockId()));
}
- it.Current()->Accept(this);
+ if (current->GetNext() == nullptr && current != block->GetLastPhi()) {
+ AddError(StringPrintf("The recorded last phi of block %d does not match "
+ "the actual last phi %d.",
+ current_block_->GetBlockId(),
+ current->GetId()));
+ }
+ current->Accept(this);
}
// Visit this block's list of instructions.
- for (HInstructionIterator it(block->GetInstructions()); !it.Done();
- it.Advance()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* current = it.Current();
// Ensure this block's list of instructions does not contains phis.
- if (it.Current()->IsPhi()) {
+ if (current->IsPhi()) {
AddError(StringPrintf("Block %d has a phi in its non-phi list.",
current_block_->GetBlockId()));
}
- it.Current()->Accept(this);
+ if (current->GetNext() == nullptr && current != block->GetLastInstruction()) {
+ AddError(StringPrintf("The recorded last instruction of block %d does not match "
+ "the actual last instruction %d.",
+ current_block_->GetBlockId(),
+ current->GetId()));
+ }
+ current->Accept(this);
}
}
@@ -251,6 +264,8 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
}
}
+ const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+
// Ensure there is only one back edge per loop.
size_t num_back_edges =
loop_header->GetLoopInformation()->GetBackEdges().Size();
@@ -263,19 +278,41 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
"Loop defined by header %d has several back edges: %zu.",
id,
num_back_edges));
+ } else {
+ DCHECK_EQ(num_back_edges, 1u);
+ int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
+ if (!loop_blocks.IsBitSet(back_edge_id)) {
+ AddError(StringPrintf(
+ "Loop defined by header %d has an invalid back edge %d.",
+ id,
+ back_edge_id));
+ }
}
- // Ensure all blocks in the loop are dominated by the loop header.
- const ArenaBitVector& loop_blocks =
- loop_header->GetLoopInformation()->GetBlocks();
+ // Ensure all blocks in the loop are live and dominated by the loop header.
for (uint32_t i : loop_blocks.Indexes()) {
HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i);
- if (!loop_header->Dominates(loop_block)) {
+ if (loop_block == nullptr) {
+ AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
+ id,
+ i));
+ } else if (!loop_header->Dominates(loop_block)) {
AddError(StringPrintf("Loop block %d not dominated by loop header %d.",
- loop_block->GetBlockId(),
+ i,
id));
}
}
+
+ // If this is a nested loop, ensure the outer loops contain a superset of the blocks.
+ for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) {
+ HLoopInformation* outer_info = it.Current();
+ if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) {
+ AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of "
+ "an outer loop defined by header %d.",
+ id,
+ outer_info->GetHeader()->GetBlockId()));
+ }
+ }
}
void SSAChecker::VisitInstruction(HInstruction* instruction) {
@@ -393,8 +430,10 @@ void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_inde
static_cast<int>(input_index),
value));
}
- } else if (input->GetType() == Primitive::kPrimInt && input->IsPhi()) {
- // TODO: We need a data-flow analysis which determines if the Phi is boolean.
+ } else if (input->GetType() == Primitive::kPrimInt
+ && (input->IsPhi() || input->IsAnd() || input->IsOr() || input->IsXor())) {
+ // TODO: We need a data-flow analysis to determine if the Phi or
+ // binary operation is actually Boolean. Allow for now.
} else if (input->GetType() != Primitive::kPrimBoolean) {
AddError(StringPrintf(
"%s instruction %d has a non-Boolean input %d whose type is: %s.",
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4c283788b5..ca9cbc3d01 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -192,6 +192,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << " " << phi->GetRegNumber();
}
+ void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE {
+ output_ << " " << barrier->GetBarrierKind();
+ }
+
bool IsPass(const char* name) {
return strcmp(pass_name_, name) == 0;
}
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 74848d5d96..708733e28c 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -55,7 +55,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> {
buckets_owned_(allocator, num_buckets_, false),
num_entries_(to_copy.num_entries_) {
// ArenaAllocator returns zeroed memory, so entries of buckets_ and
- // buckets_owned_ are initialized to nullptr and false, respectively.
+ // buckets_owned_ are initialized to null and false, respectively.
DCHECK(IsPowerOfTwo(num_buckets_));
if (num_buckets_ == to_copy.num_buckets_) {
// Hash table remains the same size. We copy the bucket pointers and leave
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 6d2a8d77e2..bffd639e83 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -190,7 +190,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
}
// Run simple optimizations on the graph.
- HDeadCodeElimination dce(callee_graph);
+ HDeadCodeElimination dce(callee_graph, stats_);
HConstantFolding fold(callee_graph);
InstructionSimplifier simplify(callee_graph, stats_);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index afbc490150..2df7c166d8 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -43,6 +43,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
void VisitEqual(HEqual* equal) OVERRIDE;
+ void VisitNotEqual(HNotEqual* equal) OVERRIDE;
+ void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE;
void VisitArraySet(HArraySet* equal) OVERRIDE;
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
@@ -60,6 +62,7 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitSub(HSub* instruction) OVERRIDE;
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
+ void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
@@ -87,10 +90,6 @@ void InstructionSimplifierVisitor::Run() {
// current index, so don't advance the iterator.
continue;
}
- if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) {
- LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_
- << ") occurred at the current position.";
- }
simplifications_at_current_position_ = 0;
it.Advance();
}
@@ -161,6 +160,10 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+ if (!check_cast->InputAt(0)->CanBeNull()) {
+ check_cast->ClearMustDoNullCheck();
+ }
+
if (!load_class->IsResolved()) {
// If the class couldn't be resolve it's not safe to compare against it. It's
// default type would be Top which might be wider that the actual class type
@@ -178,6 +181,12 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
}
}
+void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
+ if (!instruction->InputAt(0)->CanBeNull()) {
+ instruction->ClearMustDoNullCheck();
+ }
+}
+
void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) {
HBasicBlock* block = check->GetBlock();
// Currently always keep the suspend check at entry.
@@ -195,21 +204,62 @@ void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) {
}
void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
- HInstruction* input1 = equal->InputAt(0);
- HInstruction* input2 = equal->InputAt(1);
- if (input1->GetType() == Primitive::kPrimBoolean && input2->IsIntConstant()) {
- if (input2->AsIntConstant()->GetValue() == 1) {
- // Replace (bool_value == 1) with bool_value
- equal->ReplaceWith(equal->InputAt(0));
- equal->GetBlock()->RemoveInstruction(equal);
- } else {
- // We should replace (bool_value == 0) with !bool_value, but we unfortunately
- // do not have such instruction.
- DCHECK_EQ(input2->AsIntConstant()->GetValue(), 0);
+ HInstruction* input_const = equal->GetConstantRight();
+ if (input_const != nullptr) {
+ HInstruction* input_value = equal->GetLeastConstantLeft();
+ if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+ HBasicBlock* block = equal->GetBlock();
+ if (input_const->AsIntConstant()->IsOne()) {
+ // Replace (bool_value == true) with bool_value
+ equal->ReplaceWith(input_value);
+ block->RemoveInstruction(equal);
+ RecordSimplification();
+ } else {
+ // Replace (bool_value == false) with !bool_value
+ DCHECK(input_const->AsIntConstant()->IsZero());
+ block->ReplaceAndRemoveInstructionWith(
+ equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+ RecordSimplification();
+ }
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) {
+ HInstruction* input_const = not_equal->GetConstantRight();
+ if (input_const != nullptr) {
+ HInstruction* input_value = not_equal->GetLeastConstantLeft();
+ if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+ HBasicBlock* block = not_equal->GetBlock();
+ if (input_const->AsIntConstant()->IsOne()) {
+ // Replace (bool_value != true) with !bool_value
+ block->ReplaceAndRemoveInstructionWith(
+ not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+ RecordSimplification();
+ } else {
+ // Replace (bool_value != false) with bool_value
+ DCHECK(input_const->AsIntConstant()->IsZero());
+ not_equal->ReplaceWith(input_value);
+ block->RemoveInstruction(not_equal);
+ RecordSimplification();
+ }
}
}
}
+void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
+ HInstruction* parent = bool_not->InputAt(0);
+ if (parent->IsBooleanNot()) {
+ HInstruction* value = parent->InputAt(0);
+ // Replace (!(!bool_value)) with bool_value
+ bool_not->ReplaceWith(value);
+ bool_not->GetBlock()->RemoveInstruction(bool_not);
+ // It is possible that `parent` is dead at this point but we leave
+ // its removal to DCE for simplicity.
+ RecordSimplification();
+ }
+}
+
void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
HInstruction* input = instruction->InputAt(0);
// If the array is a NewArray with constant size, replace the array length
@@ -388,9 +438,16 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
if (Primitive::IsIntOrLongType(type)) {
int64_t factor = Int64FromConstant(input_cst);
- // We expect the `0` case to have been handled in the constant folding pass.
- DCHECK_NE(factor, 0);
- if (IsPowerOfTwo(factor)) {
+ // Even though constant propagation also takes care of the zero case, other
+ // optimizations can lead to having a zero multiplication.
+ if (factor == 0) {
+ // Replace code looking like
+ // MUL dst, src, 0
+ // with
+ // 0
+ instruction->ReplaceWith(input_cst);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (IsPowerOfTwo(factor)) {
// Replace code looking like
// MUL dst, src, pow_of_2
// with
@@ -424,7 +481,8 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
return;
}
- if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) {
+ if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse() &&
+ !Primitive::IsFloatingPointType(input->GetType())) {
// Replace code looking like
// SUB tmp, a, b
// NEG dst, tmp
@@ -435,6 +493,7 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
// worse code. In particular, we do not want the live ranges of `a` and `b`
// to be extended if we are not sure the initial 'SUB' instruction can be
// removed.
+ // We do not perform optimization for fp because we could lose the sign of zero.
HSub* sub = input->AsSub();
HSub* new_sub =
new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft());
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 9a6062fedf..932192e4fd 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -863,7 +863,7 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
// Note that the null check must have been done earlier.
- DCHECK(!invoke->CanDoImplicitNullCheck());
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Register argument = locations->InAt(1).AsRegister<Register>();
__ cmp(argument, ShifterOperand(0));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d3a4e6ca15..117d6a4279 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1007,7 +1007,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
// Note that the null check must have been done earlier.
- DCHECK(!invoke->CanDoImplicitNullCheck());
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Register argument = WRegisterFrom(locations->InAt(1));
__ Cmp(argument, 0);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 3c7a2660db..a8e2cdf1f6 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -828,7 +828,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
locations->AddTemp(Location::RequiresFpuRegister());
locations->AddTemp(Location::RequiresFpuRegister());
return;
@@ -962,7 +962,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
// Note that the null check must have been done earlier.
- DCHECK(!invoke->CanDoImplicitNullCheck());
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Register argument = locations->InAt(1).AsRegister<Register>();
__ testl(argument, argument);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index d9a1c31c77..5d24d1fbfb 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -704,7 +704,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
return;
}
@@ -732,14 +731,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
// Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
X86_64Assembler* assembler = GetAssembler();
- // Generate 0.5 into inPlusPointFive.
- __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
- __ movd(inPlusPointFive, out, false);
+ // Load 0.5 into inPlusPointFive.
+ __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
// Add in the input.
__ addss(inPlusPointFive, in);
@@ -747,12 +744,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
// And truncate to an integer.
__ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
- __ movl(out, Immediate(kPrimIntMax));
- // maxInt = int-to-float(out)
- __ cvtsi2ss(maxInt, out);
-
// if inPlusPointFive >= maxInt goto done
- __ comiss(inPlusPointFive, maxInt);
+ __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
@@ -782,14 +775,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
// Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
X86_64Assembler* assembler = GetAssembler();
- // Generate 0.5 into inPlusPointFive.
- __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
- __ movd(inPlusPointFive, out, true);
+ // Load 0.5 into inPlusPointFive.
+ __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
// Add in the input.
__ addsd(inPlusPointFive, in);
@@ -797,12 +788,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
// And truncate to an integer.
__ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
- __ movq(out, Immediate(kPrimLongMax));
- // maxLong = long-to-double(out)
- __ cvtsi2sd(maxLong, out, true);
-
// if inPlusPointFive >= maxLong goto done
- __ comisd(inPlusPointFive, maxLong);
+ __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
@@ -886,7 +873,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations = invoke->GetLocations();
// Note that the null check must have been done earlier.
- DCHECK(!invoke->CanDoImplicitNullCheck());
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
__ testl(argument, argument);
@@ -960,26 +947,48 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke)
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
}
static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+ Location value = locations->InAt(1);
// x86 allows unaligned access. We do not have to check the input or use specific instructions
// to avoid a SIGBUS.
switch (size) {
case Primitive::kPrimByte:
- __ movb(Address(address, 0), value);
+ if (value.IsConstant()) {
+ __ movb(Address(address, 0),
+ Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+ } else {
+ __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
+ }
break;
case Primitive::kPrimShort:
- __ movw(Address(address, 0), value);
+ if (value.IsConstant()) {
+ __ movw(Address(address, 0),
+ Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+ } else {
+ __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
+ }
break;
case Primitive::kPrimInt:
- __ movl(Address(address, 0), value);
+ if (value.IsConstant()) {
+ __ movl(Address(address, 0),
+ Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+ } else {
+ __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
+ }
break;
case Primitive::kPrimLong:
- __ movq(Address(address, 0), value);
+ if (value.IsConstant()) {
+ int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(v));
+ int32_t v_32 = v;
+ __ movq(Address(address, 0), Immediate(v_32));
+ } else {
+ __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
+ }
break;
default:
LOG(FATAL) << "Type not recognized for poke: " << size;
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index de876be9ab..c3a99150c4 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -285,17 +285,26 @@ class Location : public ValueObject {
bool Contains(Location other) const {
if (Equals(other)) {
return true;
- } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
- return other.reg() == low() || other.reg() == high();
- } else if (IsRegisterPair() && other.IsRegister()) {
- return other.reg() == low() || other.reg() == high();
- } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
- return (GetStackIndex() == other.GetStackIndex())
- || (GetStackIndex() + 4 == other.GetStackIndex());
+ } else if (IsPair() || IsDoubleStackSlot()) {
+ return ToLow().Equals(other) || ToHigh().Equals(other);
}
return false;
}
+ bool OverlapsWith(Location other) const {
+ // Only check the overlapping case that can happen with our register allocation algorithm.
+ bool overlap = Contains(other) || other.Contains(*this);
+ if (kIsDebugBuild && !overlap) {
+ // Note: These are also overlapping cases. But we are not able to handle them in
+ // ParallelMoveResolverWithSwap. Make sure that we do not meet such case with our compiler.
+ if ((IsPair() && other.IsPair()) || (IsDoubleStackSlot() && other.IsDoubleStackSlot())) {
+ DCHECK(!Contains(other.ToLow()));
+ DCHECK(!Contains(other.ToHigh()));
+ }
+ }
+ return overlap;
+ }
+
const char* DebugString() const {
switch (GetKind()) {
case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 5fca4fab22..3205b5e991 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -416,26 +416,6 @@ static void UpdateInputsUsers(HInstruction* instruction) {
DCHECK(!instruction->HasEnvironment());
}
-void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
- DCHECK(!cursor->IsPhi());
- DCHECK(!instruction->IsPhi());
- DCHECK_EQ(instruction->GetId(), -1);
- DCHECK_NE(cursor->GetId(), -1);
- DCHECK_EQ(cursor->GetBlock(), this);
- DCHECK(!instruction->IsControlFlow());
- instruction->next_ = cursor;
- instruction->previous_ = cursor->previous_;
- cursor->previous_ = instruction;
- if (GetFirstInstruction() == cursor) {
- instructions_.first_instruction_ = instruction;
- } else {
- instruction->previous_->next_ = instruction;
- }
- instruction->SetBlock(this);
- instruction->SetId(GetGraph()->GetNextInstructionId());
- UpdateInputsUsers(instruction);
-}
-
void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
HInstruction* replacement) {
DCHECK(initial->GetBlock() == this);
@@ -463,23 +443,27 @@ void HBasicBlock::AddPhi(HPhi* phi) {
Add(&phis_, this, phi);
}
+void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK(!cursor->IsPhi());
+ DCHECK(!instruction->IsPhi());
+ DCHECK_EQ(instruction->GetId(), -1);
+ DCHECK_NE(cursor->GetId(), -1);
+ DCHECK_EQ(cursor->GetBlock(), this);
+ DCHECK(!instruction->IsControlFlow());
+ instruction->SetBlock(this);
+ instruction->SetId(GetGraph()->GetNextInstructionId());
+ UpdateInputsUsers(instruction);
+ instructions_.InsertInstructionBefore(instruction, cursor);
+}
+
void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
DCHECK_EQ(phi->GetId(), -1);
DCHECK_NE(cursor->GetId(), -1);
DCHECK_EQ(cursor->GetBlock(), this);
- if (cursor->next_ == nullptr) {
- cursor->next_ = phi;
- phi->previous_ = cursor;
- DCHECK(phi->next_ == nullptr);
- } else {
- phi->next_ = cursor->next_;
- phi->previous_ = cursor;
- cursor->next_ = phi;
- phi->next_->previous_ = phi;
- }
phi->SetBlock(this);
phi->SetId(GetGraph()->GetNextInstructionId());
UpdateInputsUsers(phi);
+ phis_.InsertInstructionAfter(phi, cursor);
}
static void Remove(HInstructionList* instruction_list,
@@ -546,6 +530,34 @@ void HInstructionList::AddInstruction(HInstruction* instruction) {
}
}
+void HInstructionList::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK(Contains(cursor));
+ if (cursor == first_instruction_) {
+ cursor->previous_ = instruction;
+ instruction->next_ = cursor;
+ first_instruction_ = instruction;
+ } else {
+ instruction->previous_ = cursor->previous_;
+ instruction->next_ = cursor;
+ cursor->previous_ = instruction;
+ instruction->previous_->next_ = instruction;
+ }
+}
+
+void HInstructionList::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK(Contains(cursor));
+ if (cursor == last_instruction_) {
+ cursor->next_ = instruction;
+ instruction->previous_ = cursor;
+ last_instruction_ = instruction;
+ } else {
+ instruction->next_ = cursor->next_;
+ instruction->previous_ = cursor;
+ cursor->next_ = instruction;
+ instruction->next_->previous_ = instruction;
+ }
+}
+
void HInstructionList::RemoveInstruction(HInstruction* instruction) {
if (instruction->previous_ != nullptr) {
instruction->previous_->next_ = instruction->next_;
@@ -660,6 +672,11 @@ void HPhi::AddInput(HInstruction* input) {
input->AddUseAt(this, inputs_.Size() - 1);
}
+void HPhi::RemoveInputAt(size_t index) {
+ RemoveAsUserOfInput(index);
+ inputs_.DeleteAt(index);
+}
+
#define DEFINE_ACCEPT(name, super) \
void H##name::Accept(HGraphVisitor* visitor) { \
visitor->Visit##name(this); \
@@ -702,7 +719,7 @@ HConstant* HUnaryOperation::TryStaticEvaluation() const {
// TODO: Implement static evaluation of long unary operations.
//
// Do not exit with a fatal condition here. Instead, simply
- // return `nullptr' to notify the caller that this instruction
+ // return `null' to notify the caller that this instruction
// cannot (yet) be statically evaluated.
return nullptr;
}
@@ -738,7 +755,7 @@ HConstant* HBinaryOperation::GetConstantRight() const {
}
// If `GetConstantRight()` returns one of the input, this returns the other
-// one. Otherwise it returns nullptr.
+// one. Otherwise it returns null.
HInstruction* HBinaryOperation::GetLeastConstantLeft() const {
HInstruction* most_constant_right = GetConstantRight();
if (most_constant_right == nullptr) {
@@ -855,6 +872,15 @@ bool HBasicBlock::HasSinglePhi() const {
return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
}
+size_t HInstructionList::CountSize() const {
+ size_t size = 0;
+ HInstruction* current = first_instruction_;
+ for (; current != nullptr; current = current->GetNext()) {
+ size++;
+ }
+ return size;
+}
+
void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const {
for (HInstruction* current = first_instruction_;
current != nullptr;
@@ -886,40 +912,167 @@ void HInstructionList::Add(const HInstructionList& instruction_list) {
}
}
-void HBasicBlock::DisconnectFromAll() {
- DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario";
+void HBasicBlock::DisconnectAndDelete() {
+ // Dominators must be removed after all the blocks they dominate. This way
+ // a loop header is removed last, a requirement for correct loop information
+ // iteration.
+ DCHECK(dominated_blocks_.IsEmpty());
+
+ // Remove the block from all loops it is included in.
+ for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
+ HLoopInformation* loop_info = it.Current();
+ loop_info->Remove(this);
+ if (loop_info->IsBackEdge(*this)) {
+ // This deliberately leaves the loop in an inconsistent state and will
+ // fail SSAChecker unless the entire loop is removed during the pass.
+ loop_info->RemoveBackEdge(this);
+ }
+ }
+ // Disconnect the block from its predecessors and update their control-flow
+ // instructions.
for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
- predecessors_.Get(i)->successors_.Delete(this);
+ HBasicBlock* predecessor = predecessors_.Get(i);
+ HInstruction* last_instruction = predecessor->GetLastInstruction();
+ predecessor->RemoveInstruction(last_instruction);
+ predecessor->RemoveSuccessor(this);
+ if (predecessor->GetSuccessors().Size() == 1u) {
+ DCHECK(last_instruction->IsIf());
+ predecessor->AddInstruction(new (graph_->GetArena()) HGoto());
+ } else {
+ // The predecessor has no remaining successors and therefore must be dead.
+ // We deliberately leave it without a control-flow instruction so that the
+ // SSAChecker fails unless it is not removed during the pass too.
+ DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u);
+ }
}
+ predecessors_.Reset();
+
+ // Disconnect the block from its successors and update their dominators
+ // and phis.
for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
- successors_.Get(i)->predecessors_.Delete(this);
- }
- dominator_->dominated_blocks_.Delete(this);
+ HBasicBlock* successor = successors_.Get(i);
+ // Delete this block from the list of predecessors.
+ size_t this_index = successor->GetPredecessorIndexOf(this);
+ successor->predecessors_.DeleteAt(this_index);
+
+ // Check that `successor` has other predecessors, otherwise `this` is the
+ // dominator of `successor` which violates the order DCHECKed at the top.
+ DCHECK(!successor->predecessors_.IsEmpty());
+
+ // Recompute the successor's dominator.
+ HBasicBlock* old_dominator = successor->GetDominator();
+ HBasicBlock* new_dominator = successor->predecessors_.Get(0);
+ for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) {
+ new_dominator = graph_->FindCommonDominator(
+ new_dominator, successor->predecessors_.Get(j));
+ }
+ if (old_dominator != new_dominator) {
+ successor->SetDominator(new_dominator);
+ old_dominator->RemoveDominatedBlock(successor);
+ new_dominator->AddDominatedBlock(successor);
+ }
- predecessors_.Reset();
+ // Remove this block's entries in the successor's phis.
+ if (successor->predecessors_.Size() == 1u) {
+ // The successor has just one predecessor left. Replace phis with the only
+ // remaining input.
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ HPhi* phi = phi_it.Current()->AsPhi();
+ phi->ReplaceWith(phi->InputAt(1 - this_index));
+ successor->RemovePhi(phi);
+ }
+ } else {
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+ }
+ }
+ }
successors_.Reset();
- dominator_ = nullptr;
- graph_ = nullptr;
+
+ // Disconnect from the dominator.
+ dominator_->RemoveDominatedBlock(this);
+ SetDominator(nullptr);
+
+ // Delete from the graph. The function safely deletes remaining instructions
+ // and updates the reverse post order.
+ graph_->DeleteDeadBlock(this);
+ SetGraph(nullptr);
}
void HBasicBlock::MergeWith(HBasicBlock* other) {
- DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario";
- DCHECK(dominated_blocks_.IsEmpty()
- || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other))
- << "Unimplemented block merge scenario";
+ DCHECK_EQ(GetGraph(), other->GetGraph());
+ DCHECK(GetDominatedBlocks().Contains(other));
+ DCHECK_EQ(GetSuccessors().Size(), 1u);
+ DCHECK_EQ(GetSuccessors().Get(0), other);
+ DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+ DCHECK_EQ(other->GetPredecessors().Get(0), this);
DCHECK(other->GetPhis().IsEmpty());
+ // Move instructions from `other` to `this`.
+ DCHECK(EndsWithControlFlowInstruction());
+ RemoveInstruction(GetLastInstruction());
+ instructions_.Add(other->GetInstructions());
+ other->instructions_.SetBlockOfInstructions(this);
+ other->instructions_.Clear();
+
+ // Remove `other` from the loops it is included in.
+ for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) {
+ HLoopInformation* loop_info = it.Current();
+ loop_info->Remove(other);
+ if (loop_info->IsBackEdge(*other)) {
+ loop_info->ClearBackEdges();
+ loop_info->AddBackEdge(this);
+ }
+ }
+
+ // Update links to the successors of `other`.
successors_.Reset();
- dominated_blocks_.Reset();
+ while (!other->successors_.IsEmpty()) {
+ HBasicBlock* successor = other->successors_.Get(0);
+ successor->ReplacePredecessor(other, this);
+ }
+
+ // Update the dominator tree.
+ dominated_blocks_.Delete(other);
+ for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
+ HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
+ dominated_blocks_.Add(dominated);
+ dominated->SetDominator(this);
+ }
+ other->dominated_blocks_.Reset();
+ other->dominator_ = nullptr;
+
+ // Clear the list of predecessors of `other` in preparation of deleting it.
+ other->predecessors_.Reset();
+
+ // Delete `other` from the graph. The function updates reverse post order.
+ graph_->DeleteDeadBlock(other);
+ other->SetGraph(nullptr);
+}
+
+void HBasicBlock::MergeWithInlined(HBasicBlock* other) {
+ DCHECK_NE(GetGraph(), other->GetGraph());
+ DCHECK(GetDominatedBlocks().IsEmpty());
+ DCHECK(GetSuccessors().IsEmpty());
+ DCHECK(!EndsWithControlFlowInstruction());
+ DCHECK_EQ(other->GetPredecessors().Size(), 1u);
+ DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock());
+ DCHECK(other->GetPhis().IsEmpty());
+ DCHECK(!other->IsInLoop());
+
+ // Move instructions from `other` to `this`.
instructions_.Add(other->GetInstructions());
- other->GetInstructions().SetBlockOfInstructions(this);
+ other->instructions_.SetBlockOfInstructions(this);
- while (!other->GetSuccessors().IsEmpty()) {
- HBasicBlock* successor = other->GetSuccessors().Get(0);
+ // Update links to the successors of `other`.
+ successors_.Reset();
+ while (!other->successors_.IsEmpty()) {
+ HBasicBlock* successor = other->successors_.Get(0);
successor->ReplacePredecessor(other, this);
}
+ // Update the dominator tree.
for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
dominated_blocks_.Add(dominated);
@@ -961,6 +1114,24 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks,
}
}
+void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+ DCHECK_EQ(block->GetGraph(), this);
+ DCHECK(block->GetSuccessors().IsEmpty());
+ DCHECK(block->GetPredecessors().IsEmpty());
+ DCHECK(block->GetDominatedBlocks().IsEmpty());
+ DCHECK(block->GetDominator() == nullptr);
+
+ for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ block->RemoveInstruction(it.Current());
+ }
+ for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ block->RemovePhi(it.Current()->AsPhi());
+ }
+
+ reverse_post_order_.Delete(block);
+ blocks_.Put(block->GetBlockId(), nullptr);
+}
+
void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (GetBlocks().Size() == 3) {
// Simple case of an entry block, a body block, and an exit block.
@@ -993,7 +1164,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
HBasicBlock* first = entry_block_->GetSuccessors().Get(0);
DCHECK(!first->IsInLoop());
- at->MergeWith(first);
+ at->MergeWithInlined(first);
exit_block_->ReplaceWith(to);
// Update all predecessors of the exit block (now the `to` block)
@@ -1064,8 +1235,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_.Put(++index_of_at, current);
if (info != nullptr) {
- info->Add(current);
current->SetLoopInformation(info);
+ for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+ loop_it.Current()->Add(current);
+ }
}
}
}
@@ -1075,8 +1248,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
outer_graph->AddBlock(to);
outer_graph->reverse_post_order_.Put(++index_of_at, to);
if (info != nullptr) {
- info->Add(to);
to->SetLoopInformation(info);
+ for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+ loop_it.Current()->Add(to);
+ }
if (info->IsBackEdge(*at)) {
// Only `at` can become a back edge, as the inlined blocks
// are predecessors of `at`.
@@ -1121,53 +1296,6 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
invoke->GetBlock()->RemoveInstruction(invoke);
}
-void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) {
- // Find the two branches of an If.
- DCHECK_EQ(start_block->GetSuccessors().Size(), 2u);
- HBasicBlock* left_branch = start_block->GetSuccessors().Get(0);
- HBasicBlock* right_branch = start_block->GetSuccessors().Get(1);
-
- // Make sure this is a diamond control-flow path.
- DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block);
- DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block);
- DCHECK_EQ(end_block->GetPredecessors().Size(), 2u);
- DCHECK_EQ(start_block, end_block->GetDominator());
-
- // Disconnect the branches and merge the two blocks. This will move
- // all instructions from 'end_block' to 'start_block'.
- DCHECK(left_branch->IsSingleGoto());
- DCHECK(right_branch->IsSingleGoto());
- left_branch->DisconnectFromAll();
- right_branch->DisconnectFromAll();
- start_block->RemoveInstruction(start_block->GetLastInstruction());
- start_block->MergeWith(end_block);
-
- // Delete the now redundant blocks from the graph.
- blocks_.Put(left_branch->GetBlockId(), nullptr);
- blocks_.Put(right_branch->GetBlockId(), nullptr);
- blocks_.Put(end_block->GetBlockId(), nullptr);
-
- // Update reverse post order.
- reverse_post_order_.Delete(left_branch);
- reverse_post_order_.Delete(right_branch);
- reverse_post_order_.Delete(end_block);
-
- // Update loops which contain the code.
- for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) {
- HLoopInformation* loop_info = it.Current();
- DCHECK(loop_info->Contains(*left_branch));
- DCHECK(loop_info->Contains(*right_branch));
- DCHECK(loop_info->Contains(*end_block));
- loop_info->Remove(left_branch);
- loop_info->Remove(right_branch);
- loop_info->Remove(end_block);
- if (loop_info->IsBackEdge(*end_block)) {
- loop_info->RemoveBackEdge(end_block);
- loop_info->AddBackEdge(start_block);
- }
- }
-}
-
std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
ScopedObjectAccess soa(Thread::Current());
os << "["
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 649038b532..18a8225f55 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -19,6 +19,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
+#include "dex/compiler_enums.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
#include "handle_scope.h"
@@ -74,6 +75,10 @@ class HInstructionList {
void AddInstruction(HInstruction* instruction);
void RemoveInstruction(HInstruction* instruction);
+ // Insert `instruction` before/after an existing instruction `cursor`.
+ void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+ void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
+
// Return true if this list contains `instruction`.
bool Contains(HInstruction* instruction) const;
@@ -92,6 +97,9 @@ class HInstructionList {
void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list);
void Add(const HInstructionList& instruction_list);
+ // Return the number of instructions in the list. This is an expensive operation.
+ size_t CountSize() const;
+
private:
HInstruction* first_instruction_;
HInstruction* last_instruction_;
@@ -163,7 +171,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
void InlineInto(HGraph* outer_graph, HInvoke* invoke);
- void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block);
+ // Removes `block` from the graph.
+ void DeleteDeadBlock(HBasicBlock* block);
void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
void SimplifyLoop(HBasicBlock* header);
@@ -243,8 +252,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
return CreateConstant(value, &cached_long_constants_);
}
- private:
HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
+
+ private:
void VisitBlockForDominatorTree(HBasicBlock* block,
HBasicBlock* predecessor,
GrowableArray<size_t>* visits);
@@ -446,6 +456,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
HBasicBlock* GetDominator() const { return dominator_; }
void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; }
void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); }
+ void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); }
void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) {
for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) {
if (dominated_blocks_.Get(i) == existing) {
@@ -466,8 +477,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
HInstruction* GetFirstInstruction() const { return instructions_.first_instruction_; }
HInstruction* GetLastInstruction() const { return instructions_.last_instruction_; }
const HInstructionList& GetInstructions() const { return instructions_; }
- const HInstructionList& GetPhis() const { return phis_; }
HInstruction* GetFirstPhi() const { return phis_.first_instruction_; }
+ HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
+ const HInstructionList& GetPhis() const { return phis_; }
void AddSuccessor(HBasicBlock* block) {
successors_.Add(block);
@@ -544,7 +556,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
// that this method does not update the graph, reverse post order, loop
// information, nor make sure the blocks are consistent (for example ending
// with a control flow instruction).
- void MergeWith(HBasicBlock* other);
+ void MergeWithInlined(HBasicBlock* other);
// Replace `this` with `other`. Predecessors, successors, and dominated blocks
// of `this` are moved to `other`.
@@ -553,12 +565,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
// with a control flow instruction).
void ReplaceWith(HBasicBlock* other);
- // Disconnects `this` from all its predecessors, successors and the dominator.
- // It assumes that `this` does not dominate any blocks.
- // Note that this method does not update the graph, reverse post order, loop
- // information, nor make sure the blocks are consistent (for example ending
- // with a control flow instruction).
- void DisconnectFromAll();
+ // Merge `other` at the end of `this`. This method updates loops, reverse post
+ // order, links to predecessors, successors, dominators and deletes the block
+ // from the graph. The two blocks must be successive, i.e. `this` the only
+ // predecessor of `other` and vice versa.
+ void MergeWith(HBasicBlock* other);
+
+ // Disconnects `this` from all its predecessors, successors and dominator,
+ // removes it from all loops it is included in and eventually from the graph.
+ // The block must not dominate any other block. Predecessors and successors
+ // are safely updated.
+ void DisconnectAndDelete();
void AddInstruction(HInstruction* instruction);
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
@@ -718,6 +735,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(LoadString, Instruction) \
M(Local, Instruction) \
M(LongConstant, Constant) \
+ M(MemoryBarrier, Instruction) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
M(Neg, UnaryOperation) \
@@ -908,6 +926,12 @@ class HUserRecord : public ValueObject {
HUseListNode<T>* use_node_;
};
+// TODO: Add better documentation to this class and maybe refactor with more suggestive names.
+// - Has(All)SideEffects suggests that all the side effects are present but only ChangesSomething
+// flag is consider.
+// - DependsOn suggests that there is a real dependency between side effects but it only
+// checks DependendsOnSomething flag.
+//
// Represents the side effects an instruction may have.
class SideEffects : public ValueObject {
public:
@@ -1141,8 +1165,6 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
virtual bool CanThrow() const { return false; }
bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
- virtual bool ActAsNullConstant() const { return false; }
-
// Does not apply for all instructions, but having this at top level greatly
// simplifies the null check elimination.
virtual bool CanBeNull() const {
@@ -1150,7 +1172,10 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
return true;
}
- virtual bool CanDoImplicitNullCheck() const { return false; }
+ virtual bool CanDoImplicitNullCheckOn(HInstruction* obj) const {
+ UNUSED(obj);
+ return false;
+ }
void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) {
DCHECK_EQ(GetType(), Primitive::kPrimNot);
@@ -1618,7 +1643,7 @@ class HUnaryOperation : public HExpression<1> {
// Try to statically evaluate `operation` and return a HConstant
// containing the result of this evaluation. If `operation` cannot
- // be evaluated as a constant, return nullptr.
+ // be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
// Apply this operation to `x`.
@@ -1686,7 +1711,7 @@ class HBinaryOperation : public HExpression<2> {
// Try to statically evaluate `operation` and return a HConstant
// containing the result of this evaluation. If `operation` cannot
- // be evaluated as a constant, return nullptr.
+ // be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
// Apply this operation to `x` and `y`.
@@ -1694,11 +1719,11 @@ class HBinaryOperation : public HExpression<2> {
virtual int64_t Evaluate(int64_t x, int64_t y) const = 0;
// Returns an input that can legally be used as the right input and is
- // constant, or nullptr.
+ // constant, or null.
HConstant* GetConstantRight() const;
// If `GetConstantRight()` returns one of the input, this returns the other
- // one. Otherwise it returns nullptr.
+ // one. Otherwise it returns null.
HInstruction* GetLeastConstantLeft() const;
DECLARE_INSTRUCTION(BinaryOperation);
@@ -2064,8 +2089,6 @@ class HNullConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return 0; }
- bool ActAsNullConstant() const OVERRIDE { return true; }
-
DECLARE_INSTRUCTION(NullConstant);
private:
@@ -2087,11 +2110,6 @@ class HIntConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return GetValue(); }
- // TODO: Null is represented by the `0` constant. In most cases we replace it
- // with a HNullConstant but we don't do it when comparing (a != null). This
- // method is an workaround until we fix the above.
- bool ActAsNullConstant() const OVERRIDE { return value_ == 0; }
-
bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
bool IsZero() const OVERRIDE { return GetValue() == 0; }
bool IsOne() const OVERRIDE { return GetValue() == 1; }
@@ -2105,7 +2123,7 @@ class HIntConstant : public HConstant {
friend class HGraph;
ART_FRIEND_TEST(GraphTest, InsertInstructionBefore);
- ART_FRIEND_TEST(ParallelMoveTest, ConstantLast);
+ ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast);
DISALLOW_COPY_AND_ASSIGN(HIntConstant);
};
@@ -2162,7 +2180,7 @@ class HInvoke : public HInstruction {
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
- Intrinsics GetIntrinsic() {
+ Intrinsics GetIntrinsic() const {
return intrinsic_;
}
@@ -2217,7 +2235,8 @@ class HInvokeStaticOrDirect : public HInvoke {
invoke_type_(invoke_type),
is_recursive_(is_recursive) {}
- bool CanDoImplicitNullCheck() const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ UNUSED(obj);
// We access the method via the dex cache so we can't do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
@@ -2249,9 +2268,9 @@ class HInvokeVirtual : public HInvoke {
: HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
vtable_index_(vtable_index) {}
- bool CanDoImplicitNullCheck() const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
// TODO: Add implicit null checks in intrinsics.
- return !GetLocations()->Intrinsified();
+ return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
}
uint32_t GetVTableIndex() const { return vtable_index_; }
@@ -2275,9 +2294,9 @@ class HInvokeInterface : public HInvoke {
: HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
imt_index_(imt_index) {}
- bool CanDoImplicitNullCheck() const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
// TODO: Add implicit null checks in intrinsics.
- return !GetLocations()->Intrinsified();
+ return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
}
uint32_t GetImtIndex() const { return imt_index_; }
@@ -2738,6 +2757,7 @@ class HPhi : public HInstruction {
size_t InputCount() const OVERRIDE { return inputs_.Size(); }
void AddInput(HInstruction* input);
+ void RemoveInputAt(size_t index);
Primitive::Type GetType() const OVERRIDE { return type_; }
void SetType(Primitive::Type type) { type_ = type; }
@@ -2847,8 +2867,8 @@ class HInstanceFieldGet : public HExpression<1> {
return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
}
- bool CanDoImplicitNullCheck() const OVERRIDE {
- return GetFieldOffset().Uint32Value() < kPageSize;
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
}
size_t ComputeHashCode() const OVERRIDE {
@@ -2881,8 +2901,8 @@ class HInstanceFieldSet : public HTemplateInstruction<2> {
SetRawInputAt(1, value);
}
- bool CanDoImplicitNullCheck() const OVERRIDE {
- return GetFieldOffset().Uint32Value() < kPageSize;
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
}
const FieldInfo& GetFieldInfo() const { return field_info_; }
@@ -2912,7 +2932,8 @@ class HArrayGet : public HExpression<2> {
UNUSED(other);
return true;
}
- bool CanDoImplicitNullCheck() const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ UNUSED(obj);
// TODO: We can be smarter here.
// Currently, the array access is always preceded by an ArrayLength or a NullCheck
// which generates the implicit null check. There are cases when these can be removed
@@ -2954,7 +2975,8 @@ class HArraySet : public HTemplateInstruction<3> {
return needs_type_check_;
}
- bool CanDoImplicitNullCheck() const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ UNUSED(obj);
// TODO: Same as for ArrayGet.
return false;
}
@@ -3006,7 +3028,9 @@ class HArrayLength : public HExpression<1> {
UNUSED(other);
return true;
}
- bool CanDoImplicitNullCheck() const OVERRIDE { return true; }
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ return obj == InputAt(0);
+ }
DECLARE_INSTRUCTION(ArrayLength);
@@ -3343,6 +3367,7 @@ class HInstanceOf : public HExpression<2> {
uint32_t dex_pc)
: HExpression(Primitive::kPrimBoolean, SideEffects::None()),
class_is_final_(class_is_final),
+ must_do_null_check_(true),
dex_pc_(dex_pc) {
SetRawInputAt(0, object);
SetRawInputAt(1, constant);
@@ -3362,10 +3387,15 @@ class HInstanceOf : public HExpression<2> {
bool IsClassFinal() const { return class_is_final_; }
+ // Used only in code generation.
+ bool MustDoNullCheck() const { return must_do_null_check_; }
+ void ClearMustDoNullCheck() { must_do_null_check_ = false; }
+
DECLARE_INSTRUCTION(InstanceOf);
private:
const bool class_is_final_;
+ bool must_do_null_check_;
const uint32_t dex_pc_;
DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
@@ -3406,6 +3436,7 @@ class HCheckCast : public HTemplateInstruction<2> {
uint32_t dex_pc)
: HTemplateInstruction(SideEffects::None()),
class_is_final_(class_is_final),
+ must_do_null_check_(true),
dex_pc_(dex_pc) {
SetRawInputAt(0, object);
SetRawInputAt(1, constant);
@@ -3424,6 +3455,9 @@ class HCheckCast : public HTemplateInstruction<2> {
bool CanThrow() const OVERRIDE { return true; }
+ bool MustDoNullCheck() const { return must_do_null_check_; }
+ void ClearMustDoNullCheck() { must_do_null_check_ = false; }
+
uint32_t GetDexPc() const { return dex_pc_; }
bool IsClassFinal() const { return class_is_final_; }
@@ -3432,11 +3466,28 @@ class HCheckCast : public HTemplateInstruction<2> {
private:
const bool class_is_final_;
+ bool must_do_null_check_;
const uint32_t dex_pc_;
DISALLOW_COPY_AND_ASSIGN(HCheckCast);
};
+class HMemoryBarrier : public HTemplateInstruction<0> {
+ public:
+ explicit HMemoryBarrier(MemBarrierKind barrier_kind)
+ : HTemplateInstruction(SideEffects::None()),
+ barrier_kind_(barrier_kind) {}
+
+ MemBarrierKind GetBarrierKind() { return barrier_kind_; }
+
+ DECLARE_INSTRUCTION(MemoryBarrier);
+
+ private:
+ const MemBarrierKind barrier_kind_;
+
+ DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
+};
+
class HMonitorOperation : public HTemplateInstruction<1> {
public:
enum OperationKind {
@@ -3502,7 +3553,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
// True if this blocks a move from the given location.
bool Blocks(Location loc) const {
- return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
+ return !IsEliminated() && source_.OverlapsWith(loc);
}
// A move is redundant if it's been eliminated, if its source and
@@ -3571,8 +3622,8 @@ class HParallelMove : public HTemplateInstruction<0> {
}
}
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
- << "Same destination for two moves in a parallel move.";
+ DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination()))
+ << "Overlapped destination for two moves in a parallel move.";
}
}
moves_.Add(MoveOperands(source, destination, type, instruction));
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b13e07eb22..c46a21955c 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,9 +21,9 @@
namespace art {
-void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const {
if (stats_ != nullptr) {
- stats_->RecordStat(compilation_stat);
+ stats_->RecordStat(compilation_stat, count);
}
}
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 8b2028177b..ccf8de9f6a 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -48,7 +48,7 @@ class HOptimization : public ValueObject {
void Check();
protected:
- void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+ void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
HGraph* const graph_;
// Used to record stats about the optimization.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a95696a468..05451bcaa6 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -208,6 +208,12 @@ class OptimizingCompiler FINAL : public Compiler {
void UnInit() const OVERRIDE;
+ void MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+ if (compilation_stats_.get() != nullptr) {
+ compilation_stats_->RecordStat(compilation_stat);
+ }
+ }
+
private:
// Whether we should run any optimization or register allocation. If false, will
// just run the code generation after the graph was built.
@@ -226,7 +232,7 @@ class OptimizingCompiler FINAL : public Compiler {
CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit) const;
- mutable OptimizingCompilerStats compilation_stats_;
+ std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
std::unique_ptr<std::ostream> visualizer_output_;
@@ -243,7 +249,6 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
run_optimizations_(
(driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime)
&& !driver->GetCompilerOptions().GetDebuggable()),
- compilation_stats_(),
delegate_(Create(driver, Compiler::Kind::kQuick)) {}
void OptimizingCompiler::Init() {
@@ -258,6 +263,9 @@ void OptimizingCompiler::Init() {
<< "Invoke the compiler with '-j1'.";
visualizer_output_.reset(new std::ofstream(cfg_file_name));
}
+ if (driver->GetDumpStats()) {
+ compilation_stats_.reset(new OptimizingCompilerStats());
+ }
}
void OptimizingCompiler::UnInit() const {
@@ -265,7 +273,9 @@ void OptimizingCompiler::UnInit() const {
}
OptimizingCompiler::~OptimizingCompiler() {
- compilation_stats_.Log();
+ if (compilation_stats_.get() != nullptr) {
+ compilation_stats_->Log();
+ }
}
void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const {
@@ -310,10 +320,11 @@ static void RunOptimizations(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer,
StackHandleScopeCollection* handles) {
- HDeadCodeElimination dce(graph);
+ HDeadCodeElimination dce1(graph, stats);
+ HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph, stats);
- HBooleanSimplifier boolean_not(graph);
+ HBooleanSimplifier boolean_simplify(graph);
HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats);
@@ -329,20 +340,21 @@ static void RunOptimizations(HGraph* graph,
HOptimization* optimizations[] = {
&intrinsics,
- &dce,
+ &dce1,
&fold1,
&simplify1,
+ &inliner,
// BooleanSimplifier depends on the InstructionSimplifier removing redundant
// suspend checks to recognize empty blocks.
- &boolean_not,
- &inliner,
+ &boolean_simplify,
&fold2,
&side_effects,
&gvn,
&licm,
&bce,
&type_propagation,
- &simplify2
+ &simplify2,
+ &dce2,
};
RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer);
@@ -381,7 +393,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer) const {
StackHandleScopeCollection handles(Thread::Current());
- RunOptimizations(graph, compiler_driver, &compilation_stats_,
+ RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
dex_file, dex_compilation_unit, pass_info_printer, &handles);
AllocateRegisters(graph, codegen, pass_info_printer);
@@ -397,7 +409,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
std::vector<uint8_t> stack_map;
codegen->BuildStackMaps(&stack_map);
- compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
+ MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
@@ -435,7 +447,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
std::vector<uint8_t> gc_map;
codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
- compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
+ MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
@@ -463,7 +475,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
const DexFile& dex_file) const {
UNUSED(invoke_type);
std::string method_name = PrettyMethod(method_idx, dex_file);
- compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation);
+ MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
// Always use the thumb2 assembler: some runtime functionality (like implicit stack
@@ -474,12 +486,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
// Do not attempt to compile on architectures we do not support.
if (!IsInstructionSetSupported(instruction_set)) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
return nullptr;
}
if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledPathological);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
return nullptr;
}
@@ -489,7 +501,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
&& (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
return nullptr;
}
@@ -514,7 +526,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
compiler_driver->GetCompilerOptions()));
if (codegen.get() == nullptr) {
CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
codegen->GetAssembler()->cfi().SetEnabled(
@@ -531,7 +543,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
&dex_compilation_unit,
&dex_file,
compiler_driver,
- &compilation_stats_);
+ compilation_stats_.get());
VLOG(compiler) << "Building " << method_name;
@@ -558,7 +570,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
if (!graph->TryBuildingSsa()) {
// We could not transform the graph to SSA, bailout.
LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
return nullptr;
}
}
@@ -576,11 +588,11 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
VLOG(compiler) << "Compile baseline " << method_name;
if (!run_optimizations_) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedDisabled);
+ MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
} else if (!can_optimize) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedTryCatch);
+ MaybeRecordStat(MethodCompilationStat::kNotOptimizedTryCatch);
} else if (!can_allocate_registers) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
+ MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
}
return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
@@ -603,9 +615,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
method_idx, jclass_loader, dex_file);
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
} else {
- compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
}
}
@@ -616,7 +628,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
jclass_loader, dex_file);
if (method != nullptr) {
- compilation_stats_.RecordStat(MethodCompilationStat::kCompiledQuick);
+ MaybeRecordStat(MethodCompilationStat::kCompiledQuick);
}
return method;
}
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index d4a936d1c3..65c84e6942 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,6 +29,7 @@ enum MethodCompilationStat {
kCompiledBaseline,
kCompiledOptimized,
kCompiledQuick,
+ kInstructionSimplifications,
kInlinedInvoke,
kNotCompiledUnsupportedIsa,
kNotCompiledPathological,
@@ -48,8 +49,8 @@ enum MethodCompilationStat {
kNotCompiledVerifyAtRuntime,
kNotCompiledClassNotVerified,
kRemovedCheckedCast,
+ kRemovedDeadInstruction,
kRemovedNullCheck,
- kInstructionSimplifications,
kLastStat
};
@@ -57,8 +58,8 @@ class OptimizingCompilerStats {
public:
OptimizingCompilerStats() {}
- void RecordStat(MethodCompilationStat stat) {
- compile_stats_[stat]++;
+ void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+ compile_stats_[stat] += count;
}
void Log() const {
@@ -82,7 +83,7 @@ class OptimizingCompilerStats {
for (int i = 0; i < kLastStat; i++) {
if (compile_stats_[i] != 0) {
- VLOG(compiler) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i];
+ LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i];
}
}
}
@@ -96,6 +97,7 @@ class OptimizingCompilerStats {
case kCompiledOptimized : return "kCompiledOptimized";
case kCompiledQuick : return "kCompiledQuick";
case kInlinedInvoke : return "kInlinedInvoke";
+ case kInstructionSimplifications: return "kInstructionSimplifications";
case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
case kNotCompiledPathological : return "kNotCompiledPathological";
case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
@@ -114,8 +116,8 @@ class OptimizingCompilerStats {
case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
case kRemovedCheckedCast: return "kRemovedCheckedCast";
+ case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
case kRemovedNullCheck: return "kRemovedNullCheck";
- case kInstructionSimplifications: return "kInstructionSimplifications";
default: LOG(FATAL) << "invalid stat";
}
return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index ad92ca59a1..54ea6f19d4 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -17,11 +17,23 @@
#include "parallel_move_resolver.h"
#include "nodes.h"
-#include "locations.h"
namespace art {
-void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
+void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
+ // Perform a linear sweep of the moves to add them to the initial list of
+ // moves to perform, ignoring any move that is redundant (the source is
+ // the same as the destination, the destination is ignored and
+ // unallocated, or the move was already eliminated).
+ for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
+ MoveOperands* move = parallel_move->MoveOperandsAt(i);
+ if (!move->IsRedundant()) {
+ moves_.Add(move);
+ }
+ }
+}
+
+void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
DCHECK(moves_.IsEmpty());
// Build up a worklist of moves.
BuildInitialMoveList(parallel_move);
@@ -50,20 +62,6 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
moves_.Reset();
}
-
-void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
- // Perform a linear sweep of the moves to add them to the initial list of
- // moves to perform, ignoring any move that is redundant (the source is
- // the same as the destination, the destination is ignored and
- // unallocated, or the move was already eliminated).
- for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
- MoveOperands* move = parallel_move->MoveOperandsAt(i);
- if (!move->IsRedundant()) {
- moves_.Add(move);
- }
- }
-}
-
Location LowOf(Location location) {
if (location.IsRegisterPair()) {
return Location::RegisterLocation(location.low());
@@ -103,7 +101,7 @@ static void UpdateSourceOf(MoveOperands* move, Location updated_location, Locati
}
}
-MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We
// mark a move as "pending" on entry to PerformMove in order to detect
@@ -229,7 +227,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
}
}
-bool ParallelMoveResolver::IsScratchLocation(Location loc) {
+bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
for (size_t i = 0; i < moves_.Size(); ++i) {
if (moves_.Get(i)->Blocks(loc)) {
return false;
@@ -245,10 +243,10 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) {
return false;
}
-int ParallelMoveResolver::AllocateScratchRegister(int blocked,
- int register_count,
- int if_scratch,
- bool* spilled) {
+int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
+ int register_count,
+ int if_scratch,
+ bool* spilled) {
DCHECK_NE(blocked, if_scratch);
int scratch = -1;
for (int reg = 0; reg < register_count; ++reg) {
@@ -269,8 +267,8 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
-ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
- ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
+ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
reg_(kNoRegister),
spilled_(false) {
@@ -282,10 +280,271 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
-ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
+ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
}
}
+void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+ DCHECK(moves_.IsEmpty());
+ DCHECK(scratches_.IsEmpty());
+
+ // Backend dependent initialization.
+ PrepareForEmitNativeCode();
+
+ // Build up a worklist of moves.
+ BuildInitialMoveList(parallel_move);
+
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& move = *moves_.Get(i);
+ // Skip constants to perform them last. They don't block other moves and
+ // skipping such moves with register destinations keeps those registers
+ // free for the whole algorithm.
+ if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
+ PerformMove(i);
+ }
+ }
+
+ // Perform the moves with constant sources and register destinations with UpdateMoveSource()
+ // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
+ // from changing the constant sources to stack locations.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ Location destination = move->GetDestination();
+ if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
+ Location source = move->GetSource();
+ EmitMove(i);
+ move->Eliminate();
+ // This may introduce additional instruction dependency, but reduce number
+ // of moves and possible literal loads. For example,
+ // Original moves:
+ // 1234.5678 -> D0
+ // 1234.5678 -> D1
+ // Updated moves:
+ // 1234.5678 -> D0
+ // D0 -> D1
+ UpdateMoveSource(source, destination);
+ }
+ }
+
+ // Perform the rest of the moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (!move->IsEliminated()) {
+ EmitMove(i);
+ move->Eliminate();
+ }
+ }
+
+ // All pending moves that we have added for resolve cycles should be performed.
+ DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
+
+ // Backend dependent cleanup.
+ FinishEmitNativeCode();
+
+ moves_.Reset();
+ scratches_.Reset();
+}
+
+Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location loc = scratches_.Get(i);
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ Location loc = moves_.Get(i)->GetDestination();
+ if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
+ return loc;
+ }
+ }
+ return Location::NoLocation();
+}
+
+void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
+ if (kIsDebugBuild) {
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ DCHECK(!loc.Equals(scratches_.Get(i)));
+ }
+ }
+ scratches_.Add(loc);
+}
+
+void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
+ DCHECK(!IsBlockedByMoves(loc));
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ if (loc.Equals(scratches_.Get(i))) {
+ scratches_.DeleteAt(i);
+ break;
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
+ // Each call to this function performs a move and deletes it from the move
+ // graph. We first recursively perform any move blocking this one. We mark
+ // a move as "pending" on entry to PerformMove in order to detect cycles
+ // in the move graph. We use scratch location to resolve cycles, also
+ // additional pending moves might be added. After move has been performed,
+ // we will update source operand in the move graph to reduce dependencies in
+ // the graph.
+
+ MoveOperands* move = moves_.Get(index);
+ DCHECK(!move->IsPending());
+ DCHECK(!move->IsEliminated());
+ if (move->IsRedundant()) {
+ // Previous operations on the list of moves have caused this particular move
+ // to become a no-op, so we can safely eliminate it. Consider for example
+ // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
+ // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
+ // used as the scratch location, the move (1 -> 2) will occur while resolving
+ // the cycle. When that move is emitted, the code will update moves with a '1'
+ // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
+ // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
+ // eliminated here.
+ move->Eliminate();
+ return;
+ }
+
+ // Clear this move's destination to indicate a pending move. The actual
+ // destination is saved in a stack-allocated local. Recursion may allow
+ // multiple moves to be pending.
+ DCHECK(!move->GetSource().IsInvalid());
+ Location destination = move->MarkPending();
+
+ // Perform a depth-first traversal of the move graph to resolve
+ // dependencies. Any unperformed, unpending move with a source the same
+ // as this one's destination blocks this one so recursively perform all
+ // such moves.
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& other_move = *moves_.Get(i);
+ if (other_move.Blocks(destination) && !other_move.IsPending()) {
+ PerformMove(i);
+ }
+ }
+
+ // We are about to resolve this move and don't need it marked as
+ // pending, so restore its destination.
+ move->ClearPending(destination);
+
+ // No one else should write to the move destination when the it is pending.
+ DCHECK(!move->IsRedundant());
+
+ Location source = move->GetSource();
+ // The move may be blocked on several pending moves, in case we have a cycle.
+ if (IsBlockedByMoves(destination)) {
+ // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
+ // sequence:
+ // (C -> scratch) # Emit right now.
+ // (A -> B) (B -> C) # Unblocked.
+ // (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
+ Location::Kind kind = source.GetKind();
+ DCHECK_NE(kind, Location::kConstant);
+ Location scratch = AllocateScratchLocationFor(kind);
+ // We only care about the move size.
+ Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
+ // Perform (C -> scratch)
+ move->SetDestination(scratch);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, scratch);
+ // Add (scratch -> A).
+ AddPendingMove(scratch, destination, type);
+ } else {
+ // This move is not blocked.
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(source, destination);
+ }
+
+ // Moves in the pending list should not block any other moves. But performing
+ // unblocked moves in the pending list can free scratch registers, so we do this
+ // as early as possible.
+ MoveOperands* pending_move;
+ while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
+ Location pending_source = pending_move->GetSource();
+ Location pending_destination = pending_move->GetDestination();
+ // We do not depend on the pending move index. So just delete the move instead
+ // of eliminating it to make the pending list cleaner.
+ DeletePendingMove(pending_move);
+ move->SetSource(pending_source);
+ move->SetDestination(pending_destination);
+ EmitMove(index);
+ move->Eliminate();
+ UpdateMoveSource(pending_source, pending_destination);
+ // Free any unblocked locations in the scratch location list.
+ for (size_t i = 0; i < scratches_.Size(); ++i) {
+ Location scratch = scratches_.Get(i);
+ // Only scratch overlapping with performed move source can be unblocked.
+ if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
+ FreeScratchLocation(pending_source);
+ }
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
+ // This function is used to reduce the dependencies in the graph after
+ // (from -> to) has been performed. Since we ensure there is no move with the same
+ // destination, (to -> X) can not be blocked while (from -> X) might still be
+ // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
+ // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
+ // a dependency between the two. If we update the source location from 1 to 2, we
+ // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
+ //
+ // This is not something we must do, but we can use fewer scratch locations with
+ // this trick. For example, we can avoid using additional scratch locations for
+ // moves (0 -> 1), (1 -> 2), (1 -> 0).
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ MoveOperands* move = moves_.Get(i);
+ if (move->GetSource().Equals(from)) {
+ move->SetSource(to);
+ }
+ }
+}
+
+void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
+ Location destination, Primitive::Type type) {
+ pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+}
+
+void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
+ pending_moves_.Delete(move);
+}
+
+MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ MoveOperands* move = pending_moves_.Get(i);
+ Location destination = move->GetDestination();
+ // Only moves with destination overlapping with input loc can be unblocked.
+ if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
+ return move;
+ }
+ }
+ return nullptr;
+}
+
+bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
+ for (size_t i = 0; i < pending_moves_.Size(); ++i) {
+ if (pending_moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ if (moves_.Get(i)->Blocks(loc)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// So far it is only used for debugging purposes to make sure all pending moves
+// have been performed.
+size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
+ return pending_moves_.Size();
+}
+
} // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 95f8ad5b74..e89417df7d 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -19,30 +19,47 @@
#include "base/value_object.h"
#include "utils/growable_array.h"
+#include "locations.h"
namespace art {
class HParallelMove;
-class Location;
class MoveOperands;
-/**
- * Helper class to resolve a set of parallel moves. Architecture dependent code
- * generator must have their own subclass that implements the `EmitMove` and `EmitSwap`
- * operations.
- */
+// Helper classes to resolve a set of parallel moves. Architecture dependent code generator must
+// have their own subclass that implements corresponding virtual functions.
class ParallelMoveResolver : public ValueObject {
public:
explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
virtual ~ParallelMoveResolver() {}
// Resolve a set of parallel moves, emitting assembler instructions.
- void EmitNativeCode(HParallelMove* parallel_move);
+ virtual void EmitNativeCode(HParallelMove* parallel_move) = 0;
+
+ protected:
+ // Build the initial list of moves.
+ void BuildInitialMoveList(HParallelMove* parallel_move);
+
+ GrowableArray<MoveOperands*> moves_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+};
+
+// This helper class uses swap to resolve dependencies and may emit swap.
+class ParallelMoveResolverWithSwap : public ParallelMoveResolver {
+ public:
+ explicit ParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolver(allocator) {}
+ virtual ~ParallelMoveResolverWithSwap() {}
+
+ // Resolve a set of parallel moves, emitting assembler instructions.
+ void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
protected:
class ScratchRegisterScope : public ValueObject {
public:
- ScratchRegisterScope(ParallelMoveResolver* resolver,
+ ScratchRegisterScope(ParallelMoveResolverWithSwap* resolver,
int blocked,
int if_scratch,
int number_of_registers);
@@ -52,11 +69,12 @@ class ParallelMoveResolver : public ValueObject {
bool IsSpilled() const { return spilled_; }
private:
- ParallelMoveResolver* resolver_;
+ ParallelMoveResolverWithSwap* resolver_;
int reg_;
bool spilled_;
};
+ // Return true if the location can be scratched.
bool IsScratchLocation(Location loc);
// Allocate a scratch register for performing a move. The method will try to use
@@ -72,15 +90,9 @@ class ParallelMoveResolver : public ValueObject {
virtual void SpillScratch(int reg) = 0;
virtual void RestoreScratch(int reg) = 0;
- // List of moves not yet resolved.
- GrowableArray<MoveOperands*> moves_;
-
static constexpr int kNoRegister = -1;
private:
- // Build the initial list of moves.
- void BuildInitialMoveList(HParallelMove* parallel_move);
-
// Perform the move at the moves_ index in question (possibly requiring
// other moves to satisfy dependencies).
//
@@ -99,7 +111,83 @@ class ParallelMoveResolver : public ValueObject {
// the right value.
MoveOperands* PerformMove(size_t index);
- DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverWithSwap);
+};
+
+// This helper class uses additional scratch registers to resolve dependencies. It supports all kind
+// of dependency cycles and does not care about the register layout.
+class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
+ public:
+ explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolver(allocator), scratches_(allocator, 32),
+ pending_moves_(allocator, 8), allocator_(allocator) {}
+ virtual ~ParallelMoveResolverNoSwap() {}
+
+ // Resolve a set of parallel moves, emitting assembler instructions.
+ void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
+
+ protected:
+ // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent
+ // initialization here.
+ virtual void PrepareForEmitNativeCode() = 0;
+
+ // Called at the end of EmitNativeCode(). A subclass may put some architecture dependent cleanup
+ // here. All scratch locations will be removed after this call.
+ virtual void FinishEmitNativeCode() = 0;
+
+ // Allocate a scratch location to perform a move from input kind of location. A subclass should
+ // implement this to get the best fit location. If there is no suitable physical register, it can
+ // also return a stack slot.
+ virtual Location AllocateScratchLocationFor(Location::Kind kind) = 0;
+
+ // Called after a move which takes a scratch location as source. A subclass can defer the cleanup
+ // to FinishEmitNativeCode().
+ virtual void FreeScratchLocation(Location loc) = 0;
+
+ // Emit a move.
+ virtual void EmitMove(size_t index) = 0;
+
+ // Return a scratch location from the moves which exactly matches the kind.
+ // Return Location::NoLocation() if no matching scratch location can be found.
+ Location GetScratchLocation(Location::Kind kind);
+
+ // Add a location to the scratch list which can be returned from GetScratchLocation() to resolve
+ // dependency cycles.
+ void AddScratchLocation(Location loc);
+
+ // Remove a location from the scratch list.
+ void RemoveScratchLocation(Location loc);
+
+ // List of scratch locations.
+ GrowableArray<Location> scratches_;
+
+ private:
+ // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy
+ // dependencies).
+ void PerformMove(size_t index);
+
+ void UpdateMoveSource(Location from, Location to);
+
+ void AddPendingMove(Location source, Location destination, Primitive::Type type);
+
+ void DeletePendingMove(MoveOperands* move);
+
+ // Find a move that may be unblocked after (loc -> XXX) is performed.
+ MoveOperands* GetUnblockedPendingMove(Location loc);
+
+ // Return true if the location is blocked by outstanding moves.
+ bool IsBlockedByMoves(Location loc);
+
+ // Return the number of pending moves.
+ size_t GetNumberOfPendingMoves();
+
+ // Additional pending moves which might be added to resolve dependency cycle.
+ GrowableArray<MoveOperands*> pending_moves_;
+
+ // Used to allocate pending MoveOperands.
+ ArenaAllocator* const allocator_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverNoSwap);
};
} // namespace art
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 95cca5172b..f8f70105cf 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -19,27 +19,41 @@
#include "parallel_move_resolver.h"
#include "gtest/gtest.h"
+#include "gtest/gtest-typed-test.h"
namespace art {
-class TestParallelMoveResolver : public ParallelMoveResolver {
- public:
- explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {}
-
- void Dump(Location location) {
- if (location.IsConstant()) {
- message_ << "C";
- } else if (location.IsPair()) {
- message_ << location.low() << "," << location.high();
- } else if (location.IsRegister()) {
- message_ << location.reg();
- } else if (location.IsStackSlot()) {
- message_ << location.GetStackIndex() << "(sp)";
- } else {
- message_ << "2x" << location.GetStackIndex() << "(sp)";
- DCHECK(location.IsDoubleStackSlot()) << location;
- }
+constexpr int kScratchRegisterStartIndexForTest = 100;
+
+static void DumpRegisterForTest(std::ostream& os, int reg) {
+ if (reg >= kScratchRegisterStartIndexForTest) {
+ os << "T" << reg - kScratchRegisterStartIndexForTest;
+ } else {
+ os << reg;
}
+}
+
+static void DumpLocationForTest(std::ostream& os, Location location) {
+ if (location.IsConstant()) {
+ os << "C";
+ } else if (location.IsPair()) {
+ DumpRegisterForTest(os, location.low());
+ os << ",";
+ DumpRegisterForTest(os, location.high());
+ } else if (location.IsRegister()) {
+ DumpRegisterForTest(os, location.reg());
+ } else if (location.IsStackSlot()) {
+ os << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsDoubleStackSlot())<< location;
+ os << "2x" << location.GetStackIndex() << "(sp)";
+ }
+}
+
+class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
+ public:
+ explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolverWithSwap(allocator) {}
void EmitMove(size_t index) OVERRIDE {
MoveOperands* move = moves_.Get(index);
@@ -47,9 +61,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << " ";
}
message_ << "(";
- Dump(move->GetSource());
+ DumpLocationForTest(message_, move->GetSource());
message_ << " -> ";
- Dump(move->GetDestination());
+ DumpLocationForTest(message_, move->GetDestination());
message_ << ")";
}
@@ -59,9 +73,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << " ";
}
message_ << "(";
- Dump(move->GetSource());
+ DumpLocationForTest(message_, move->GetSource());
message_ << " <-> ";
- Dump(move->GetDestination());
+ DumpLocationForTest(message_, move->GetDestination());
message_ << ")";
}
@@ -76,7 +90,64 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
std::ostringstream message_;
- DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolver);
+ DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverWithSwap);
+};
+
+class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
+ public:
+ explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator)
+ : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {}
+
+ void PrepareForEmitNativeCode() OVERRIDE {
+ scratch_index_ = kScratchRegisterStartIndexForTest;
+ }
+
+ void FinishEmitNativeCode() OVERRIDE {}
+
+ Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE {
+ if (kind == Location::kStackSlot || kind == Location::kFpuRegister ||
+ kind == Location::kRegister) {
+ kind = Location::kRegister;
+ } else {
+ // Allocate register pair for double stack slot which simulates 32-bit backend's behavior.
+ kind = Location::kRegisterPair;
+ }
+ Location scratch = GetScratchLocation(kind);
+ if (scratch.Equals(Location::NoLocation())) {
+ AddScratchLocation(Location::RegisterLocation(scratch_index_));
+ AddScratchLocation(Location::RegisterLocation(scratch_index_ + 1));
+ AddScratchLocation(Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1));
+ scratch = (kind == Location::kRegister) ? Location::RegisterLocation(scratch_index_)
+ : Location::RegisterPairLocation(scratch_index_, scratch_index_ + 1);
+ scratch_index_ += 2;
+ }
+ return scratch;
+ }
+
+ void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
+
+ void EmitMove(size_t index) OVERRIDE {
+ MoveOperands* move = moves_.Get(index);
+ if (!message_.str().empty()) {
+ message_ << " ";
+ }
+ message_ << "(";
+ DumpLocationForTest(message_, move->GetSource());
+ message_ << " -> ";
+ DumpLocationForTest(message_, move->GetDestination());
+ message_ << ")";
+ }
+
+ std::string GetMessage() const {
+ return message_.str();
+ }
+
+ private:
+ std::ostringstream message_;
+
+ int scratch_index_;
+
+ DISALLOW_COPY_AND_ASSIGN(TestParallelMoveResolverNoSwap);
};
static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
@@ -93,55 +164,102 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
return moves;
}
-TEST(ParallelMoveTest, Dependency) {
+template <typename T>
+class ParallelMoveTest : public ::testing::Test {
+ public:
+ static const bool has_swap;
+};
+
+template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true;
+template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false;
+
+typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>
+ ParallelMoveResolverTestTypes;
+
+TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
+
+
+TYPED_TEST(ParallelMoveTest, Dependency) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {1, 4}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2 -> 3) (1 -> 2) (1 -> 4) (0 -> 1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> 3) (1 -> 2) (0 -> 1) (2 -> 4)", resolver.GetMessage().c_str());
+ }
}
}
-TEST(ParallelMoveTest, Swap) {
+TYPED_TEST(ParallelMoveTest, Cycle) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {1, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> 2) (0 -> 1) (2 -> 0)", resolver.GetMessage().c_str());
+ }
+ }
+
+ {
+ TypeParam resolver(&allocator);
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {0, 2}};
+ resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0 -> 2) (1 <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0 -> 2) (1 -> 0) (2 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(4 -> T0) (3 -> 4) (2 -> 3) (1 -> 2) (0 -> 1) (T0 -> 0)",
+ resolver.GetMessage().c_str());
+ }
}
}
-TEST(ParallelMoveTest, ConstantLast) {
+TYPED_TEST(ParallelMoveTest, ConstantLast) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::ConstantLocation(new (&allocator) HIntConstant(0)),
@@ -157,12 +275,12 @@ TEST(ParallelMoveTest, ConstantLast) {
ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
}
-TEST(ParallelMoveTest, Pairs) {
+TYPED_TEST(ParallelMoveTest, Pairs) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -179,7 +297,7 @@ TEST(ParallelMoveTest, Pairs) {
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -196,7 +314,7 @@ TEST(ParallelMoveTest, Pairs) {
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -209,10 +327,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> T0) (0,1 -> 2,3) (T0 -> 0)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -230,10 +352,15 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(2),
@@ -251,10 +378,15 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (7 -> 1) (2 -> 7) (T0,T1 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -272,10 +404,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(7 -> T0) (2 -> 7) (0,1 -> 2,3) (T0 -> 1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -288,10 +424,14 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2,3 -> T0,T1) (0,1 -> 2,3) (T0,T1 -> 0,1)", resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(2, 3),
@@ -304,12 +444,85 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(0,1 -> T0,T1) (2,3 -> 0,1) (T0,T1 -> 2,3)", resolver.GetMessage().c_str());
+ }
+ }
+}
+
+TYPED_TEST(ParallelMoveTest, MultiCycles) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ {
+ TypeParam resolver(&allocator);
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 0}, {2, 3}, {3, 2}};
+ resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(1 <-> 0) (3 <-> 2)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(1 -> T0) (0 -> 1) (T0 -> 0) (3 -> T0) (2 -> 3) (T0 -> 2)",
+ resolver.GetMessage().c_str());
+ }
+ }
+ {
+ TypeParam resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(0),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(3),
+ Location::RegisterLocation(1),
+ Primitive::kPrimInt,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2 -> T0) (3 -> T1) (0,1 -> 2,3) (T0 -> 0) (T1 -> 1)",
+ resolver.GetMessage().c_str());
+ }
+ }
+ {
+ TypeParam resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(0),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(3),
+ Location::RegisterLocation(1),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ Primitive::kPrimLong,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(3 -> T0) (0,1 -> T2,T3) (T0 -> 1) (2 -> 0) (T2,T3 -> 2,3)",
+ resolver.GetMessage().c_str());
+ }
}
{
// Test involving registers used in single context and pair context.
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(10),
@@ -327,17 +540,22 @@ TEST(ParallelMoveTest, Pairs) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2x32(sp) -> T0,T1) (4,5 -> 2x32(sp)) (10 -> 5) (T0,T1 -> 10,11)",
+ resolver.GetMessage().c_str());
+ }
}
}
// Test that we do 64bits moves before 32bits moves.
-TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterLocation(0),
@@ -355,11 +573,16 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
Primitive::kPrimInt,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(48(sp) -> T0) (1 -> 48(sp)) (0 -> 1) (T0 -> 0)",
+ resolver.GetMessage().c_str());
+ }
}
{
- TestParallelMoveResolver resolver(&allocator);
+ TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
moves->AddMove(
Location::RegisterPairLocation(0, 1),
@@ -377,7 +600,12 @@ TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
Primitive::kPrimLong,
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2x32(sp) -> T0,T1) (2,3 -> 2x32(sp)) (0,1 -> 2,3) (T0,T1 -> 0,1)",
+ resolver.GetMessage().c_str());
+ }
}
}
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 479b87fea0..12b1c2b9bd 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -58,36 +58,40 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
}
void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) {
- HInstruction* lastInstruction = block->GetLastInstruction();
- if (!lastInstruction->IsIf()) {
+ HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ if (ifInstruction == nullptr) {
return;
}
- HInstruction* ifInput = lastInstruction->InputAt(0);
+ HInstruction* ifInput = ifInstruction->InputAt(0);
if (!ifInput->IsNotEqual() && !ifInput->IsEqual()) {
return;
}
HInstruction* input0 = ifInput->InputAt(0);
HInstruction* input1 = ifInput->InputAt(1);
- HInstruction* obj;
+ HInstruction* obj = nullptr;
- if ((input0->GetType() == Primitive::kPrimNot) && input1->ActAsNullConstant()) {
+ if (input1->IsNullConstant()) {
obj = input0;
- } else if ((input1->GetType() == Primitive::kPrimNot) && input0->ActAsNullConstant()) {
+ } else if (input0->IsNullConstant()) {
obj = input1;
} else {
return;
}
- HBoundType* bound_type =
- new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false));
-
- block->InsertInstructionBefore(bound_type, lastInstruction);
+ // We only need to bound the type if we have uses in the relevant block.
+ // So start with null and create the HBoundType lazily, only if it's needed.
+ HBoundType* bound_type = nullptr;
HBasicBlock* notNullBlock = ifInput->IsNotEqual()
- ? lastInstruction->AsIf()->IfTrueSuccessor()
- : lastInstruction->AsIf()->IfFalseSuccessor();
+ ? ifInstruction->IfTrueSuccessor()
+ : ifInstruction->IfFalseSuccessor();
+
for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
HInstruction* user = it.Current()->GetUser();
if (notNullBlock->Dominates(user->GetBlock())) {
+ if (bound_type == nullptr) {
+ bound_type = new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false));
+ notNullBlock->InsertInstructionBefore(bound_type, notNullBlock->GetFirstInstruction());
+ }
user->ReplaceInput(bound_type, it.Current()->GetIndex());
}
}
@@ -98,49 +102,58 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) {
// If that's the case insert an HBoundType instruction to bound the type of `x`
// to `ClassX` in the scope of the dominated blocks.
void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
- HInstruction* lastInstruction = block->GetLastInstruction();
- if (!lastInstruction->IsIf()) {
- return;
- }
- HInstruction* ifInput = lastInstruction->InputAt(0);
- // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual).
- if (!ifInput->IsEqual()) {
+ HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ if (ifInstruction == nullptr) {
return;
}
- HInstruction* instanceOf = ifInput->InputAt(0);
- HInstruction* comp_value = ifInput->InputAt(1);
- if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) {
+ HInstruction* ifInput = ifInstruction->InputAt(0);
+ HInstruction* instanceOf = nullptr;
+ HBasicBlock* instanceOfTrueBlock = nullptr;
+
+ // The instruction simplifier has transformed:
+ // - `if (a instanceof A)` into an HIf with an HInstanceOf input
+ // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn
+ // has an HInstanceOf input)
+ // So we should not see the usual HEqual here.
+ if (ifInput->IsInstanceOf()) {
+ instanceOf = ifInput;
+ instanceOfTrueBlock = ifInstruction->IfTrueSuccessor();
+ } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) {
+ instanceOf = ifInput->InputAt(0);
+ instanceOfTrueBlock = ifInstruction->IfFalseSuccessor();
+ } else {
return;
}
- HInstruction* obj = instanceOf->InputAt(0);
- HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
-
- ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
- HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
-
- // Narrow the type as much as possible.
- {
- ScopedObjectAccess soa(Thread::Current());
- if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
- bound_type->SetReferenceTypeInfo(obj_rti);
- } else {
- bound_type->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
- }
- }
-
- block->InsertInstructionBefore(bound_type, lastInstruction);
- // Pick the right successor based on the value we compare against.
- HIntConstant* comp_value_int = comp_value->AsIntConstant();
- HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0
- ? lastInstruction->AsIf()->IfFalseSuccessor()
- : lastInstruction->AsIf()->IfTrueSuccessor();
+ // We only need to bound the type if we have uses in the relevant block.
+ // So start with null and create the HBoundType lazily, only if it's needed.
+ HBoundType* bound_type = nullptr;
+ HInstruction* obj = instanceOf->InputAt(0);
for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
HInstruction* user = it.Current()->GetUser();
if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
+ if (bound_type == nullptr) {
+ HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
+
+ ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+ ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
+
+ // Narrow the type as much as possible.
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
+ bound_type->SetReferenceTypeInfo(obj_rti);
+ } else {
+ bound_type->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+ }
+ }
+
+ instanceOfTrueBlock->InsertInstructionBefore(
+ bound_type, instanceOfTrueBlock->GetFirstInstruction());
+ }
user->ReplaceInput(bound_type, it.Current()->GetIndex());
}
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 6350b35ca1..0fdf051957 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -378,7 +378,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
// Split just before first register use.
size_t first_register_use = current->FirstRegisterUse();
if (first_register_use != kNoLifetime) {
- LiveInterval* split = Split(current, first_register_use - 1);
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
// Don't add directly to `unhandled`, it needs to be sorted and the start
// of this new interval might be after intervals already in the list.
AddSorted(&unhandled, split);
@@ -903,6 +903,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
return false;
}
+ // We use the first use to compare with other intervals. If this interval
+ // is used after any active intervals, we will spill this interval.
+ size_t first_use = current->FirstUseAfter(current->GetStart());
+
// First set all registers as not being used.
size_t* next_use = registers_array_;
for (size_t i = 0; i < number_of_registers_; ++i) {
@@ -917,7 +921,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
if (active->IsFixed()) {
next_use[active->GetRegister()] = current->GetStart();
} else {
- size_t use = active->FirstRegisterUseAfter(current->GetStart());
+ size_t use = active->FirstUseAfter(current->GetStart());
if (use != kNoLifetime) {
next_use[active->GetRegister()] = use;
}
@@ -945,7 +949,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
next_use[inactive->GetRegister()] =
std::min(next_intersection, next_use[inactive->GetRegister()]);
} else {
- size_t use = inactive->FirstRegisterUseAfter(current->GetStart());
+ size_t use = inactive->FirstUseAfter(current->GetStart());
if (use != kNoLifetime) {
next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]);
}
@@ -959,16 +963,16 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
DCHECK(current->IsHighInterval());
reg = current->GetRegister();
// When allocating the low part, we made sure the high register was available.
- DCHECK_LT(first_register_use, next_use[reg]);
+ DCHECK_LT(first_use, next_use[reg]);
} else if (current->IsLowInterval()) {
reg = FindAvailableRegisterPair(next_use, first_register_use);
// We should spill if both registers are not available.
- should_spill = (first_register_use >= next_use[reg])
- || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+ should_spill = (first_use >= next_use[reg])
+ || (first_use >= next_use[GetHighForLowRegister(reg)]);
} else {
DCHECK(!current->IsHighInterval());
reg = FindAvailableRegister(next_use);
- should_spill = (first_register_use >= next_use[reg]);
+ should_spill = (first_use >= next_use[reg]);
}
DCHECK_NE(reg, kNoRegister);
@@ -993,15 +997,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
// If the first use of that instruction is after the last use of the found
// register, we split this interval just before its first register use.
AllocateSpillSlotFor(current);
- LiveInterval* split = Split(current, first_register_use - 1);
+ LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1);
if (current == split) {
DumpInterval(std::cerr, current);
DumpAllIntervals(std::cerr);
// This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+ HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2);
CHECK(false) << "There is not enough registers available for "
<< split->GetParent()->GetDefinedBy()->DebugName() << " "
<< split->GetParent()->GetDefinedBy()->GetId()
- << " at " << first_register_use - 1;
+ << " at " << first_register_use - 1 << " "
+ << (at == nullptr ? "" : at->DebugName());
}
AddSorted(unhandled_, split);
}
@@ -1094,6 +1100,31 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter
}
}
+LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
+ HBasicBlock* block_from = liveness_.GetBlockFromPosition(from);
+ HBasicBlock* block_to = liveness_.GetBlockFromPosition(to);
+ DCHECK(block_from != nullptr);
+ DCHECK(block_to != nullptr);
+
+ // Both locations are in the same block. We split at the given location.
+ if (block_from == block_to) {
+ return Split(interval, to);
+ }
+
+ // If `to` is in a loop, find the outermost loop header which does not contain `from`.
+ for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
+ HBasicBlock* header = it.Current()->GetHeader();
+ if (block_from->GetLifetimeStart() >= header->GetLifetimeStart()) {
+ break;
+ }
+ block_to = header;
+ }
+
+ // Split at the start of the found block, to piggy back on existing moves
+ // due to resolution if non-linear control flow (see `ConnectSplitSiblings`).
+ return Split(interval, block_to->GetLifetimeStart());
+}
+
LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
DCHECK_GE(position, interval->GetStart());
DCHECK(!interval->IsDeadAt(position));
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 717be75533..dc9c708eea 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -86,8 +86,12 @@ class RegisterAllocator {
// Add `interval` in the given sorted list.
static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval);
- // Split `interval` at the position `at`. The new interval starts at `at`.
- LiveInterval* Split(LiveInterval* interval, size_t at);
+ // Split `interval` at the position `position`. The new interval starts at `position`.
+ LiveInterval* Split(LiveInterval* interval, size_t position);
+
+ // Split `interval` at a position between `from` and `to`. The method will try
+ // to find an optimal split position.
+ LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
// Returns whether `reg` is blocked by the code generator.
bool IsBlocked(int reg) const;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 182cd0e833..8c6d904a4c 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -854,6 +854,10 @@ TEST(RegisterAllocatorTest, SpillInactive) {
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(graph, &codegen);
+ // Populate the instructions in the liveness object, to please the register allocator.
+ for (size_t i = 0; i < 32; ++i) {
+ liveness.instructions_from_lifetime_position_.Add(user);
+ }
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
register_allocator.unhandled_core_intervals_.Add(fourth);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 8eb98a186b..97254edb5e 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -131,6 +131,9 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
void Dump(std::ostream& stream) const {
stream << position_;
+ if (is_environment_) {
+ stream << " (env)";
+ }
}
UsePosition* Dup(ArenaAllocator* allocator) const {
@@ -330,7 +333,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
if (after_loop == nullptr) {
// Uses are only in the loop.
- first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(start, end, nullptr);
+ first_range_ = last_range_ = range_search_start_ =
+ new (allocator_) LiveRange(start, end, nullptr);
} else if (after_loop->GetStart() <= end) {
first_range_ = range_search_start_ = after_loop;
// There are uses after the loop.
@@ -366,6 +370,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
LiveInterval* GetParent() const { return parent_; }
+ // Returns whether this interval is the parent interval, that is, the interval
+ // that starts where the HInstruction is defined.
+ bool IsParent() const { return parent_ == this; }
+
LiveRange* GetFirstRange() const { return first_range_; }
LiveRange* GetLastRange() const { return last_range_; }
@@ -442,7 +450,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
}
- if (position == GetStart() && defined_by_ != nullptr) {
+ if (position == GetStart() && IsParent()) {
LocationSummary* locations = defined_by_->GetLocations();
Location location = locations->Out();
// This interval is the first interval of the instruction. If the output
@@ -491,12 +499,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return position == GetStart() ? position : kNoLifetime;
}
+ if (position == GetStart() && IsParent()) {
+ if (defined_by_->GetLocations()->Out().IsValid()) {
+ return position;
+ }
+ }
+
UsePosition* use = first_use_;
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
if (!use->GetIsEnvironment()) {
+ Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
size_t use_position = use->GetPosition();
- if (use_position > position) {
+ if (use_position > position && location.IsValid()) {
return use_position;
}
}
@@ -582,7 +597,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
previous->next_ = nullptr;
new_interval->first_range_ = current;
if (range_search_start_ != nullptr && range_search_start_->GetEnd() >= current->GetEnd()) {
- // Search start point is inside `new_interval`. Change it to nullptr
+ // Search start point is inside `new_interval`. Change it to null
// (i.e. the end of the interval) in the original interval.
range_search_start_ = nullptr;
}
@@ -725,7 +740,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
void AddHighInterval(bool is_temp = false) {
- DCHECK_EQ(GetParent(), this);
+ DCHECK(IsParent());
DCHECK(!HasHighInterval());
DCHECK(!HasLowInterval());
high_or_low_interval_ = new (allocator_) LiveInterval(
@@ -849,7 +864,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
defined_by_(defined_by) {}
// Searches for a LiveRange that either covers the given position or is the
- // first next LiveRange. Returns nullptr if no such LiveRange exists. Ranges
+ // first next LiveRange. Returns null if no such LiveRange exists. Ranges
// known to end before `position` can be skipped with `search_start`.
LiveRange* FindRangeAtOrAfter(size_t position, LiveRange* search_start) const {
if (kIsDebugBuild) {
@@ -983,6 +998,15 @@ class SsaLivenessAnalysis : public ValueObject {
return instructions_from_lifetime_position_.Get(index);
}
+ HBasicBlock* GetBlockFromPosition(size_t index) const {
+ HInstruction* instruction = GetInstructionFromPosition(index / 2);
+ if (instruction == nullptr) {
+ // If we are at a block boundary, get the block following.
+ instruction = GetInstructionFromPosition((index / 2) + 1);
+ }
+ return instruction->GetBlock();
+ }
+
HInstruction* GetTempUser(LiveInterval* temp) const {
// A temporary shares the same lifetime start as the instruction that requires it.
DCHECK(temp->IsTemp());
@@ -1053,6 +1077,8 @@ class SsaLivenessAnalysis : public ValueObject {
GrowableArray<HInstruction*> instructions_from_lifetime_position_;
size_t number_of_ssa_values_;
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis);
};
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
new file mode 100644
index 0000000000..8344fc3237
--- /dev/null
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stack_map_stream.h"
+
+namespace art {
+
+void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask,
+ BitVector* sp_mask,
+ uint32_t num_dex_registers,
+ uint8_t inlining_depth) {
+ DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+ current_entry_.dex_pc = dex_pc;
+ current_entry_.native_pc_offset = native_pc_offset;
+ current_entry_.register_mask = register_mask;
+ current_entry_.sp_mask = sp_mask;
+ current_entry_.num_dex_registers = num_dex_registers;
+ current_entry_.inlining_depth = inlining_depth;
+ current_entry_.dex_register_locations_start_index = dex_register_locations_.Size();
+ current_entry_.inline_infos_start_index = inline_infos_.Size();
+ current_entry_.dex_register_map_hash = 0;
+ current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
+ if (num_dex_registers != 0) {
+ current_entry_.live_dex_registers_mask =
+ new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
+ } else {
+ current_entry_.live_dex_registers_mask = nullptr;
+ }
+
+ if (sp_mask != nullptr) {
+ stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
+ }
+ if (inlining_depth > 0) {
+ number_of_stack_maps_with_inline_info_++;
+ }
+
+ dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
+ native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
+ register_mask_max_ = std::max(register_mask_max_, register_mask);
+}
+
+void StackMapStream::EndStackMapEntry() {
+ current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
+ stack_maps_.Add(current_entry_);
+ current_entry_ = StackMapEntry();
+}
+
+void StackMapStream::AddDexRegisterEntry(uint16_t dex_register,
+ DexRegisterLocation::Kind kind,
+ int32_t value) {
+ DCHECK_LT(dex_register, current_entry_.num_dex_registers);
+
+ if (kind != DexRegisterLocation::Kind::kNone) {
+ // Ensure we only use non-compressed location kind at this stage.
+ DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
+ << DexRegisterLocation::PrettyDescriptor(kind);
+ DexRegisterLocation location(kind, value);
+
+ // Look for Dex register `location` in the location catalog (using the
+ // companion hash map of locations to indices). Use its index if it
+ // is already in the location catalog. If not, insert it (in the
+ // location catalog and the hash map) and use the newly created index.
+ auto it = location_catalog_entries_indices_.Find(location);
+ if (it != location_catalog_entries_indices_.end()) {
+ // Retrieve the index from the hash map.
+ dex_register_locations_.Add(it->second);
+ } else {
+ // Create a new entry in the location catalog and the hash map.
+ size_t index = location_catalog_entries_.Size();
+ location_catalog_entries_.Add(location);
+ dex_register_locations_.Add(index);
+ location_catalog_entries_indices_.Insert(std::make_pair(location, index));
+ }
+
+ current_entry_.live_dex_registers_mask->SetBit(dex_register);
+ current_entry_.dex_register_map_hash +=
+ (1 << (dex_register % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte)));
+ current_entry_.dex_register_map_hash += static_cast<uint32_t>(value);
+ current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind);
+ }
+}
+
+void StackMapStream::AddInlineInfoEntry(uint32_t method_index) {
+ InlineInfoEntry entry;
+ entry.method_index = method_index;
+ inline_infos_.Add(entry);
+}
+
+size_t StackMapStream::PrepareForFillIn() {
+ int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too.
+ stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
+ inline_info_size_ = ComputeInlineInfoSize();
+ dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+ stack_maps_size_ = stack_maps_.Size()
+ * StackMap::ComputeStackMapSize(stack_mask_size_,
+ inline_info_size_,
+ dex_register_maps_size_,
+ dex_pc_max_,
+ native_pc_offset_max_,
+ register_mask_max_);
+ dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+
+ // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
+ needed_size_ = CodeInfo::kFixedSize
+ + dex_register_location_catalog_size_
+ + stack_maps_size_
+ + dex_register_maps_size_
+ + inline_info_size_;
+
+ dex_register_location_catalog_start_ = CodeInfo::kFixedSize;
+ stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
+ dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_;
+ inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
+
+ return needed_size_;
+}
+
+size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
+ size_t size = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t location_catalog_entry_index = 0;
+ location_catalog_entry_index < location_catalog_entries_.Size();
+ ++location_catalog_entry_index) {
+ DexRegisterLocation dex_register_location =
+ location_catalog_entries_.Get(location_catalog_entry_index);
+ size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ return size;
+}
+
+size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+ // Size of the map in bytes.
+ size_t size = DexRegisterMap::kFixedSize;
+ // Add the live bit mask for the Dex register liveness.
+ size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
+ // Compute the size of the set of live Dex register entries.
+ size_t number_of_live_dex_registers = 0;
+ for (size_t dex_register_number = 0;
+ dex_register_number < entry.num_dex_registers;
+ ++dex_register_number) {
+ if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+ ++number_of_live_dex_registers;
+ }
+ }
+ size_t map_entries_size_in_bits =
+ DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+ * number_of_live_dex_registers;
+ size_t map_entries_size_in_bytes =
+ RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ size += map_entries_size_in_bytes;
+ return size;
+}
+
+size_t StackMapStream::ComputeDexRegisterMapsSize() const {
+ size_t size = 0;
+ for (size_t i = 0; i < stack_maps_.Size(); ++i) {
+ StackMapEntry entry = stack_maps_.Get(i);
+ if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
+ // Entries with the same dex map will have the same offset.
+ size += ComputeDexRegisterMapSize(entry);
+ }
+ }
+ return size;
+}
+
+size_t StackMapStream::ComputeInlineInfoSize() const {
+ return inline_infos_.Size() * InlineInfo::SingleEntrySize()
+ // For encoding the depth.
+ + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
+}
+
+void StackMapStream::FillIn(MemoryRegion region) {
+ DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+ DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
+
+ CodeInfo code_info(region);
+ DCHECK_EQ(region.size(), needed_size_);
+ code_info.SetOverallSize(region.size());
+
+ MemoryRegion dex_register_locations_region = region.Subregion(
+ dex_register_maps_start_, dex_register_maps_size_);
+
+ MemoryRegion inline_infos_region = region.Subregion(
+ inline_infos_start_, inline_info_size_);
+
+ code_info.SetEncoding(inline_info_size_,
+ dex_register_maps_size_,
+ dex_pc_max_,
+ native_pc_offset_max_,
+ register_mask_max_);
+ code_info.SetNumberOfStackMaps(stack_maps_.Size());
+ code_info.SetStackMaskSize(stack_mask_size_);
+ DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_);
+
+ // Set the Dex register location catalog.
+ code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size());
+ MemoryRegion dex_register_location_catalog_region = region.Subregion(
+ dex_register_location_catalog_start_, dex_register_location_catalog_size_);
+ DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
+ // Offset in `dex_register_location_catalog` where to store the next
+ // register location.
+ size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
+ DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+ dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
+ location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ // Ensure we reached the end of the Dex registers location_catalog.
+ DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
+
+ uintptr_t next_dex_register_map_offset = 0;
+ uintptr_t next_inline_info_offset = 0;
+ for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) {
+ StackMap stack_map = code_info.GetStackMapAt(i);
+ StackMapEntry entry = stack_maps_.Get(i);
+
+ stack_map.SetDexPc(code_info, entry.dex_pc);
+ stack_map.SetNativePcOffset(code_info, entry.native_pc_offset);
+ stack_map.SetRegisterMask(code_info, entry.register_mask);
+ if (entry.sp_mask != nullptr) {
+ stack_map.SetStackMask(code_info, *entry.sp_mask);
+ }
+
+ if (entry.num_dex_registers == 0) {
+ // No dex map available.
+ stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap);
+ } else {
+ // Search for an entry with the same dex map.
+ if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) {
+ // If we have a hit reuse the offset.
+ stack_map.SetDexRegisterMapOffset(code_info,
+ code_info.GetStackMapAt(entry.same_dex_register_map_as_)
+ .GetDexRegisterMapOffset(code_info));
+ } else {
+ // New dex registers maps should be added to the stack map.
+ MemoryRegion register_region =
+ dex_register_locations_region.Subregion(
+ next_dex_register_map_offset,
+ ComputeDexRegisterMapSize(entry));
+ next_dex_register_map_offset += register_region.size();
+ DexRegisterMap dex_register_map(register_region);
+ stack_map.SetDexRegisterMapOffset(
+ code_info, register_region.start() - dex_register_locations_region.start());
+
+ // Set the live bit mask.
+ dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
+
+ // Set the dex register location mapping data.
+ for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+ dex_register_number < entry.num_dex_registers;
+ ++dex_register_number) {
+ if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+ size_t location_catalog_entry_index =
+ dex_register_locations_.Get(entry.dex_register_locations_start_index
+ + index_in_dex_register_locations);
+ dex_register_map.SetLocationCatalogEntryIndex(
+ index_in_dex_register_locations,
+ location_catalog_entry_index,
+ entry.num_dex_registers,
+ location_catalog_entries_.Size());
+ ++index_in_dex_register_locations;
+ }
+ }
+ }
+ }
+
+ // Set the inlining info.
+ if (entry.inlining_depth != 0) {
+ MemoryRegion inline_region = inline_infos_region.Subregion(
+ next_inline_info_offset,
+ InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize());
+ next_inline_info_offset += inline_region.size();
+ InlineInfo inline_info(inline_region);
+
+ // Currently relative to the dex register map.
+ stack_map.SetInlineDescriptorOffset(
+ code_info, inline_region.start() - dex_register_locations_region.start());
+
+ inline_info.SetDepth(entry.inlining_depth);
+ for (size_t j = 0; j < entry.inlining_depth; ++j) {
+ InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index);
+ inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
+ }
+ } else {
+ if (inline_info_size_ != 0) {
+ stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo);
+ }
+ }
+ }
+}
+
+size_t StackMapStream::FindEntryWithTheSameDexMap() {
+ size_t current_entry_index = stack_maps_.Size();
+ auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
+ if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
+ // We don't have a perfect hash functions so we need a list to collect all stack maps
+ // which might have the same dex register map.
+ GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
+ stack_map_indices.Add(current_entry_index);
+ dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices);
+ return kNoSameDexMapFound;
+ }
+
+ // We might have collisions, so we need to check whether or not we really have a match.
+ for (size_t i = 0; i < entries_it->second.Size(); i++) {
+ size_t test_entry_index = entries_it->second.Get(i);
+ if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) {
+ return test_entry_index;
+ }
+ }
+ entries_it->second.Add(current_entry_index);
+ return kNoSameDexMapFound;
+}
+
+bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
+ if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) {
+ return true;
+ }
+ if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) {
+ return false;
+ }
+ if (a.num_dex_registers != b.num_dex_registers) {
+ return false;
+ }
+
+ int index_in_dex_register_locations = 0;
+ for (uint32_t i = 0; i < a.num_dex_registers; i++) {
+ if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) {
+ return false;
+ }
+ if (a.live_dex_registers_mask->IsBitSet(i)) {
+ size_t a_loc = dex_register_locations_.Get(
+ a.dex_register_locations_start_index + index_in_dex_register_locations);
+ size_t b_loc = dex_register_locations_.Get(
+ b.dex_register_locations_start_index + index_in_dex_register_locations);
+ if (a_loc != b_loc) {
+ return false;
+ }
+ ++index_in_dex_register_locations;
+ }
+ }
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 9a9e068a9b..0c626be89f 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -70,13 +70,18 @@ class StackMapStream : public ValueObject {
native_pc_offset_max_(0),
register_mask_max_(0),
number_of_stack_maps_with_inline_info_(0),
- dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
-
- // Compute bytes needed to encode a mask with the given maximum element.
- static uint32_t StackMaskEncodingSize(int max_element) {
- int number_of_bits = max_element + 1; // Need room for max element too.
- return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte;
- }
+ dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
+ current_entry_(),
+ stack_mask_size_(0),
+ inline_info_size_(0),
+ dex_register_maps_size_(0),
+ stack_maps_size_(0),
+ dex_register_location_catalog_size_(0),
+ dex_register_location_catalog_start_(0),
+ stack_maps_start_(0),
+ dex_register_maps_start_(0),
+ inline_infos_start_(0),
+ needed_size_(0) {}
// See runtime/stack_map.h to know what these fields contain.
struct StackMapEntry {
@@ -90,380 +95,42 @@ class StackMapStream : public ValueObject {
size_t inline_infos_start_index;
BitVector* live_dex_registers_mask;
uint32_t dex_register_map_hash;
+ size_t same_dex_register_map_as_;
};
struct InlineInfoEntry {
uint32_t method_index;
};
- void AddStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask,
- BitVector* sp_mask,
- uint32_t num_dex_registers,
- uint8_t inlining_depth) {
- StackMapEntry entry;
- entry.dex_pc = dex_pc;
- entry.native_pc_offset = native_pc_offset;
- entry.register_mask = register_mask;
- entry.sp_mask = sp_mask;
- entry.num_dex_registers = num_dex_registers;
- entry.inlining_depth = inlining_depth;
- entry.dex_register_locations_start_index = dex_register_locations_.Size();
- entry.inline_infos_start_index = inline_infos_.Size();
- entry.dex_register_map_hash = 0;
- if (num_dex_registers != 0) {
- entry.live_dex_registers_mask =
- new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
- } else {
- entry.live_dex_registers_mask = nullptr;
- }
- stack_maps_.Add(entry);
-
- if (sp_mask != nullptr) {
- stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
- }
- if (inlining_depth > 0) {
- number_of_stack_maps_with_inline_info_++;
- }
-
- dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
- native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
- register_mask_max_ = std::max(register_mask_max_, register_mask);
- }
-
- void AddInlineInfoEntry(uint32_t method_index) {
- InlineInfoEntry entry;
- entry.method_index = method_index;
- inline_infos_.Add(entry);
- }
-
- size_t ComputeNeededSize() {
- size_t size = CodeInfo::kFixedSize
- + ComputeDexRegisterLocationCatalogSize()
- + ComputeStackMapsSize()
- + ComputeDexRegisterMapsSize()
- + ComputeInlineInfoSize();
- // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
- return size;
- }
-
- size_t ComputeStackMaskSize() const {
- return StackMaskEncodingSize(stack_mask_max_);
- }
-
- size_t ComputeStackMapsSize() {
- return stack_maps_.Size() * StackMap::ComputeStackMapSize(
- ComputeStackMaskSize(),
- ComputeInlineInfoSize(),
- ComputeDexRegisterMapsSize(),
- dex_pc_max_,
- native_pc_offset_max_,
- register_mask_max_);
- }
-
- // Compute the size of the Dex register location catalog of `entry`.
- size_t ComputeDexRegisterLocationCatalogSize() const {
- size_t size = DexRegisterLocationCatalog::kFixedSize;
- for (size_t location_catalog_entry_index = 0;
- location_catalog_entry_index < location_catalog_entries_.Size();
- ++location_catalog_entry_index) {
- DexRegisterLocation dex_register_location =
- location_catalog_entries_.Get(location_catalog_entry_index);
- size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
- }
- return size;
- }
-
- size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
- // Size of the map in bytes.
- size_t size = DexRegisterMap::kFixedSize;
- // Add the live bit mask for the Dex register liveness.
- size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
- // Compute the size of the set of live Dex register entries.
- size_t number_of_live_dex_registers = 0;
- for (size_t dex_register_number = 0;
- dex_register_number < entry.num_dex_registers;
- ++dex_register_number) {
- if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- ++number_of_live_dex_registers;
- }
- }
- size_t map_entries_size_in_bits =
- DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
- * number_of_live_dex_registers;
- size_t map_entries_size_in_bytes =
- RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
- size += map_entries_size_in_bytes;
- return size;
- }
-
- // Compute the size of all the Dex register maps.
- size_t ComputeDexRegisterMapsSize() {
- size_t size = 0;
- for (size_t i = 0; i < stack_maps_.Size(); ++i) {
- if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) {
- // Entries with the same dex map will have the same offset.
- size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
- }
- }
- return size;
- }
-
- // Compute the size of all the inline information pieces.
- size_t ComputeInlineInfoSize() const {
- return inline_infos_.Size() * InlineInfo::SingleEntrySize()
- // For encoding the depth.
- + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
- }
+ void BeginStackMapEntry(uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask,
+ BitVector* sp_mask,
+ uint32_t num_dex_registers,
+ uint8_t inlining_depth);
+ void EndStackMapEntry();
- size_t ComputeDexRegisterLocationCatalogStart() const {
- return CodeInfo::kFixedSize;
- }
-
- size_t ComputeStackMapsStart() const {
- return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
- }
-
- size_t ComputeDexRegisterMapsStart() {
- return ComputeStackMapsStart() + ComputeStackMapsSize();
- }
-
- size_t ComputeInlineInfoStart() {
- return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize();
- }
+ void AddDexRegisterEntry(uint16_t dex_register,
+ DexRegisterLocation::Kind kind,
+ int32_t value);
- void FillIn(MemoryRegion region) {
- CodeInfo code_info(region);
- DCHECK_EQ(region.size(), ComputeNeededSize());
- code_info.SetOverallSize(region.size());
+ void AddInlineInfoEntry(uint32_t method_index);
- size_t stack_mask_size = ComputeStackMaskSize();
-
- size_t dex_register_map_size = ComputeDexRegisterMapsSize();
- size_t inline_info_size = ComputeInlineInfoSize();
-
- MemoryRegion dex_register_locations_region = region.Subregion(
- ComputeDexRegisterMapsStart(),
- dex_register_map_size);
-
- MemoryRegion inline_infos_region = region.Subregion(
- ComputeInlineInfoStart(),
- inline_info_size);
-
- code_info.SetEncoding(inline_info_size,
- dex_register_map_size,
- dex_pc_max_,
- native_pc_offset_max_,
- register_mask_max_);
- code_info.SetNumberOfStackMaps(stack_maps_.Size());
- code_info.SetStackMaskSize(stack_mask_size);
- DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
-
- // Set the Dex register location catalog.
- code_info.SetNumberOfDexRegisterLocationCatalogEntries(
- location_catalog_entries_.Size());
- MemoryRegion dex_register_location_catalog_region = region.Subregion(
- ComputeDexRegisterLocationCatalogStart(),
- ComputeDexRegisterLocationCatalogSize());
- DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
- // Offset in `dex_register_location_catalog` where to store the next
- // register location.
- size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
- for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
- DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
- dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
- location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
- }
- // Ensure we reached the end of the Dex registers location_catalog.
- DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
-
- uintptr_t next_dex_register_map_offset = 0;
- uintptr_t next_inline_info_offset = 0;
- for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) {
- StackMap stack_map = code_info.GetStackMapAt(i);
- StackMapEntry entry = stack_maps_.Get(i);
-
- stack_map.SetDexPc(code_info, entry.dex_pc);
- stack_map.SetNativePcOffset(code_info, entry.native_pc_offset);
- stack_map.SetRegisterMask(code_info, entry.register_mask);
- if (entry.sp_mask != nullptr) {
- stack_map.SetStackMask(code_info, *entry.sp_mask);
- }
-
- if (entry.num_dex_registers == 0) {
- // No dex map available.
- stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap);
- } else {
- // Search for an entry with the same dex map.
- size_t entry_with_same_map = FindEntryWithTheSameDexMap(i);
- if (entry_with_same_map != kNoSameDexMapFound) {
- // If we have a hit reuse the offset.
- stack_map.SetDexRegisterMapOffset(code_info,
- code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info));
- } else {
- // New dex registers maps should be added to the stack map.
- MemoryRegion register_region =
- dex_register_locations_region.Subregion(
- next_dex_register_map_offset,
- ComputeDexRegisterMapSize(entry));
- next_dex_register_map_offset += register_region.size();
- DexRegisterMap dex_register_map(register_region);
- stack_map.SetDexRegisterMapOffset(
- code_info, register_region.start() - dex_register_locations_region.start());
-
- // Set the live bit mask.
- dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
-
- // Set the dex register location mapping data.
- for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
- dex_register_number < entry.num_dex_registers;
- ++dex_register_number) {
- if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- size_t location_catalog_entry_index =
- dex_register_locations_.Get(entry.dex_register_locations_start_index
- + index_in_dex_register_locations);
- dex_register_map.SetLocationCatalogEntryIndex(
- index_in_dex_register_locations,
- location_catalog_entry_index,
- entry.num_dex_registers,
- location_catalog_entries_.Size());
- ++index_in_dex_register_locations;
- }
- }
- }
- }
-
- // Set the inlining info.
- if (entry.inlining_depth != 0) {
- MemoryRegion inline_region = inline_infos_region.Subregion(
- next_inline_info_offset,
- InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize());
- next_inline_info_offset += inline_region.size();
- InlineInfo inline_info(inline_region);
-
- // Currently relative to the dex register map.
- stack_map.SetInlineDescriptorOffset(
- code_info, inline_region.start() - dex_register_locations_region.start());
-
- inline_info.SetDepth(entry.inlining_depth);
- for (size_t j = 0; j < entry.inlining_depth; ++j) {
- InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index);
- inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
- }
- } else {
- if (inline_info_size != 0) {
- stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo);
- }
- }
- }
- }
-
- void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) {
- StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
- DCHECK_LT(dex_register, entry.num_dex_registers);
-
- if (kind != DexRegisterLocation::Kind::kNone) {
- // Ensure we only use non-compressed location kind at this stage.
- DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
- << DexRegisterLocation::PrettyDescriptor(kind);
- DexRegisterLocation location(kind, value);
-
- // Look for Dex register `location` in the location catalog (using the
- // companion hash map of locations to indices). Use its index if it
- // is already in the location catalog. If not, insert it (in the
- // location catalog and the hash map) and use the newly created index.
- auto it = location_catalog_entries_indices_.Find(location);
- if (it != location_catalog_entries_indices_.end()) {
- // Retrieve the index from the hash map.
- dex_register_locations_.Add(it->second);
- } else {
- // Create a new entry in the location catalog and the hash map.
- size_t index = location_catalog_entries_.Size();
- location_catalog_entries_.Add(location);
- dex_register_locations_.Add(index);
- location_catalog_entries_indices_.Insert(std::make_pair(location, index));
- }
-
- entry.live_dex_registers_mask->SetBit(dex_register);
- entry.dex_register_map_hash +=
- (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte)));
- entry.dex_register_map_hash += static_cast<uint32_t>(value);
- entry.dex_register_map_hash += static_cast<uint32_t>(kind);
- stack_maps_.Put(stack_maps_.Size() - 1, entry);
- }
- }
+ // Prepares the stream to fill in a memory region. Must be called before FillIn.
+ // Returns the size (in bytes) needed to store this stream.
+ size_t PrepareForFillIn();
+ void FillIn(MemoryRegion region);
private:
- // Returns the index of an entry with the same dex register map
- // or kNoSameDexMapFound if no such entry exists.
- size_t FindEntryWithTheSameDexMap(size_t entry_index) {
- StackMapEntry entry = stack_maps_.Get(entry_index);
- auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash);
- if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
- // We don't have a perfect hash functions so we need a list to collect all stack maps
- // which might have the same dex register map.
- GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
- stack_map_indices.Add(entry_index);
- dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices);
- return kNoSameDexMapFound;
- }
-
- // TODO: We don't need to add ourselves to the map if we can guarantee that
- // FindEntryWithTheSameDexMap is called just once per stack map entry.
- // A good way to do this is to cache the offset in the stack map entry. This
- // is easier to do if we add markers when the stack map constructions begins
- // and when it ends.
+ size_t ComputeDexRegisterLocationCatalogSize() const;
+ size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const;
+ size_t ComputeDexRegisterMapsSize() const;
+ size_t ComputeInlineInfoSize() const;
- // We might have collisions, so we need to check whether or not we should
- // add the entry to the map. `needs_to_be_added` keeps track of this.
- bool needs_to_be_added = true;
- size_t result = kNoSameDexMapFound;
- for (size_t i = 0; i < entries_it->second.Size(); i++) {
- size_t test_entry_index = entries_it->second.Get(i);
- if (test_entry_index == entry_index) {
- needs_to_be_added = false;
- } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) {
- result = test_entry_index;
- needs_to_be_added = false;
- break;
- }
- }
- if (needs_to_be_added) {
- entries_it->second.Add(entry_index);
- }
- return result;
- }
-
- bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
- if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) {
- return true;
- }
- if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) {
- return false;
- }
- if (a.num_dex_registers != b.num_dex_registers) {
- return false;
- }
-
- int index_in_dex_register_locations = 0;
- for (uint32_t i = 0; i < a.num_dex_registers; i++) {
- if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) {
- return false;
- }
- if (a.live_dex_registers_mask->IsBitSet(i)) {
- size_t a_loc = dex_register_locations_.Get(
- a.dex_register_locations_start_index + index_in_dex_register_locations);
- size_t b_loc = dex_register_locations_.Get(
- b.dex_register_locations_start_index + index_in_dex_register_locations);
- if (a_loc != b_loc) {
- return false;
- }
- ++index_in_dex_register_locations;
- }
- }
- return true;
- }
+ // Returns the index of an entry with the same dex register map as the current_entry,
+ // or kNoSameDexMapFound if no such entry exists.
+ size_t FindEntryWithTheSameDexMap();
+ bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
ArenaAllocator* allocator_;
GrowableArray<StackMapEntry> stack_maps_;
@@ -476,8 +143,7 @@ class StackMapStream : public ValueObject {
DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
LocationCatalogEntriesIndices location_catalog_entries_indices_;
- // A set of concatenated maps of Dex register locations indices to
- // `location_catalog_entries_`.
+ // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
GrowableArray<size_t> dex_register_locations_;
GrowableArray<InlineInfoEntry> inline_infos_;
int stack_mask_max_;
@@ -488,6 +154,18 @@ class StackMapStream : public ValueObject {
ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
+ StackMapEntry current_entry_;
+ size_t stack_mask_size_;
+ size_t inline_info_size_;
+ size_t dex_register_maps_size_;
+ size_t stack_maps_size_;
+ size_t dex_register_location_catalog_size_;
+ size_t dex_register_location_catalog_start_;
+ size_t stack_maps_start_;
+ size_t dex_register_maps_start_;
+ size_t inline_infos_start_;
+ size_t needed_size_;
+
static constexpr uint32_t kNoSameDexMapFound = -1;
DISALLOW_COPY_AND_ASSIGN(StackMapStream);
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 8d160bc81e..3291a77021 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -40,11 +40,12 @@ TEST(StackMapTest, Test1) {
ArenaBitVector sp_mask(&arena, 0, false);
size_t number_of_dex_registers = 2;
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location.
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);
@@ -123,20 +124,22 @@ TEST(StackMapTest, Test2) {
sp_mask1.SetBit(2);
sp_mask1.SetBit(4);
size_t number_of_dex_registers = 2;
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
stream.AddInlineInfoEntry(42);
stream.AddInlineInfoEntry(82);
+ stream.EndStackMapEntry();
ArenaBitVector sp_mask2(&arena, 0, true);
sp_mask2.SetBit(3);
sp_mask1.SetBit(8);
- stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location.
stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location.
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);
@@ -273,11 +276,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 2;
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);
@@ -353,7 +357,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 1024;
// Create the first stack map (and its Dex register map).
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
// Use two different Dex register locations to populate this map,
@@ -362,13 +366,15 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
// art::DexRegisterMap::SingleEntrySizeInBits).
stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location.
}
+ stream.EndStackMapEntry();
// Create the second stack map (and its Dex register map).
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location.
}
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);
@@ -413,19 +419,22 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 2;
// First stack map.
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
+ stream.EndStackMapEntry();
// Second stack map, which should share the same dex register map.
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
+ stream.EndStackMapEntry();
// Third stack map (doesn't share the dex register map).
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location.
stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);
@@ -462,9 +471,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 0;
- stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.EndStackMapEntry();
- size_t size = stream.ComputeNeededSize();
+ size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
stream.FillIn(region);