summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc9
-rw-r--r--compiler/optimizing/builder.cc10
-rw-r--r--compiler/optimizing/code_generator_arm.cc158
-rw-r--r--compiler/optimizing/code_generator_arm.h6
-rw-r--r--compiler/optimizing/code_generator_arm64.cc38
-rw-r--r--compiler/optimizing/code_generator_mips.cc28
-rw-r--r--compiler/optimizing/code_generator_mips64.cc28
-rw-r--r--compiler/optimizing/code_generator_x86.cc784
-rw-r--r--compiler/optimizing/code_generator_x86.h105
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc786
-rw-r--r--compiler/optimizing/code_generator_x86_64.h111
-rw-r--r--compiler/optimizing/common_arm64.h5
-rw-r--r--compiler/optimizing/constant_folding_test.cc4
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc9
-rw-r--r--compiler/optimizing/graph_checker.cc14
-rw-r--r--compiler/optimizing/graph_checker_test.cc10
-rw-r--r--compiler/optimizing/graph_visualizer.cc19
-rw-r--r--compiler/optimizing/gvn_test.cc20
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc8
-rw-r--r--compiler/optimizing/induction_var_range_test.cc5
-rw-r--r--compiler/optimizing/inliner.cc67
-rw-r--r--compiler/optimizing/instruction_simplifier.cc212
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc1
-rw-r--r--compiler/optimizing/intrinsics.cc36
-rw-r--r--compiler/optimizing/intrinsics_arm.cc193
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc119
-rw-r--r--compiler/optimizing/intrinsics_list.h17
-rw-r--r--compiler/optimizing/intrinsics_mips.cc17
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc18
-rw-r--r--compiler/optimizing/intrinsics_x86.cc289
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc315
-rw-r--r--compiler/optimizing/licm_test.cc17
-rw-r--r--compiler/optimizing/linearize_test.cc21
-rw-r--r--compiler/optimizing/live_ranges_test.cc18
-rw-r--r--compiler/optimizing/liveness_test.cc30
-rw-r--r--compiler/optimizing/load_store_elimination.cc35
-rw-r--r--compiler/optimizing/nodes.cc49
-rw-r--r--compiler/optimizing/nodes.h353
-rw-r--r--compiler/optimizing/nodes_arm64.h2
-rw-r--r--compiler/optimizing/optimizing_compiler.cc51
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h8
-rw-r--r--compiler/optimizing/optimizing_unit_test.h11
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc133
-rw-r--r--compiler/optimizing/primitive_type_propagation.h52
-rw-r--r--compiler/optimizing/reference_type_propagation.cc143
-rw-r--r--compiler/optimizing/register_allocator_test.cc34
-rw-r--r--compiler/optimizing/ssa_builder.cc695
-rw-r--r--compiler/optimizing/ssa_builder.h42
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc18
-rw-r--r--compiler/optimizing/ssa_test.cc31
50 files changed, 3333 insertions, 1851 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 4c3f66aa4f..dc75ff1abc 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1590,15 +1590,18 @@ class BCEVisitor : public HGraphVisitor {
HGraph* graph = GetGraph();
HInstruction* zero;
switch (type) {
- case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
- case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
- case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+ case Primitive::kPrimNot: zero = graph->GetNullConstant(); break;
+ case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+ case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
default: zero = graph->GetConstant(type, 0); break;
}
HPhi* phi = new (graph->GetArena())
HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
phi->SetRawInputAt(0, instruction);
phi->SetRawInputAt(1, zero);
+ if (type == Primitive::kPrimNot) {
+ phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+ }
new_preheader->AddPhi(phi);
return phi;
}
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e1404ce59c..1178d0fb25 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -2841,15 +2841,21 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
}
case Instruction::CONST_STRING: {
+ uint32_t string_index = instruction.VRegB_21c();
+ bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+ *dex_file_, string_index);
current_block_->AddInstruction(
- new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc));
+ new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
break;
}
case Instruction::CONST_STRING_JUMBO: {
+ uint32_t string_index = instruction.VRegB_31c();
+ bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache(
+ *dex_file_, string_index);
current_block_->AddInstruction(
- new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc));
+ new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache));
UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
break;
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0a26786a87..9fda83840c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3234,6 +3234,147 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction)
}
}
+void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ Location rhs = locations->InAt(1);
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (rhs.IsConstant()) {
+ // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+ // so map all rotations to a +ve. equivalent in that range.
+ // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+ uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+ if (rot) {
+ // Rotate, mapping left rotations to right equivalents if necessary.
+ // (e.g. left by 2 bits == right by 30.)
+ __ Ror(out, in, rot);
+ } else if (out != in) {
+ __ Mov(out, in);
+ }
+ } else {
+ __ Ror(out, in, rhs.AsRegister<Register>());
+ }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+ Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Location rhs = locations->InAt(1);
+ Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ if (rhs.IsConstant()) {
+ uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+ // Map all rotations to +ve. equivalents on the interval [0,63].
+ rot &= kMaxLongShiftValue;
+ // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+ // logic below to a simple pair of binary orr.
+ // (e.g. 34 bits == in_reg swap + 2 bits right.)
+ if (rot >= kArmBitsPerWord) {
+ rot -= kArmBitsPerWord;
+ std::swap(in_reg_hi, in_reg_lo);
+ }
+ // Rotate, or mov to out for zero or word size rotations.
+ if (rot != 0u) {
+ __ Lsr(out_reg_hi, in_reg_hi, rot);
+ __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot));
+ __ Lsr(out_reg_lo, in_reg_lo, rot);
+ __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot));
+ } else {
+ __ Mov(out_reg_lo, in_reg_lo);
+ __ Mov(out_reg_hi, in_reg_hi);
+ }
+ } else {
+ Register shift_right = locations->GetTemp(0).AsRegister<Register>();
+ Register shift_left = locations->GetTemp(1).AsRegister<Register>();
+ Label end;
+ Label shift_by_32_plus_shift_right;
+
+ __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+ __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
+ __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep);
+ __ b(&shift_by_32_plus_shift_right, CC);
+
+ // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+ // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+ __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+ __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+ __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+ __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+ __ Lsr(shift_left, in_reg_hi, shift_right);
+ __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+ __ b(&end);
+
+ __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
+ // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+ // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+ __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+ __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+ __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+ __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+ __ Lsl(shift_right, in_reg_hi, shift_left);
+ __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+ __ Bind(&end);
+ }
+}
+void LocationsBuilderARM::HandleRotate(HRor* ror) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+ switch (ror->GetResultType()) {
+ case Primitive::kPrimInt: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (ror->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
+ Primitive::Type type = ror->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HandleIntegerRotate(locations);
+ break;
+ }
+ case Primitive::kPrimLong: {
+ HandleLongRotate(locations);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitRor(HRor* op) {
+ HandleRotate(op);
+}
+
+void InstructionCodeGeneratorARM::VisitRor(HRor* op) {
+ HandleRotate(op);
+}
+
void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
@@ -5067,16 +5208,15 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
}
void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
- codegen_->AddSlowPath(slow_path);
-
LocationSummary* locations = load->GetLocations();
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
@@ -5107,8 +5247,12 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
__ LoadFromOffset(kLoadWord, out, out, cache_offset);
}
- __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ if (!load->IsInDexCache()) {
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
+ codegen_->AddSlowPath(slow_path);
+ __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
}
static int32_t GetExceptionTlsOffset() {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 193add2541..8193c2808c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,9 @@ class LocationsBuilderARM : public HGraphVisitor {
private:
void HandleInvoke(HInvoke* invoke);
void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+ void HandleIntegerRotate(LocationSummary* locations);
+ void HandleLongRotate(LocationSummary* locations);
+ void HandleRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -213,6 +216,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void GenerateOrrConst(Register out, Register first, uint32_t value);
void GenerateEorConst(Register out, Register first, uint32_t value);
void HandleBitwiseOperation(HBinaryOperation* operation);
+ void HandleIntegerRotate(LocationSummary* locations);
+ void HandleLongRotate(LocationSummary* locations);
+ void HandleRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateWideAtomicStore(Register addr, uint32_t offset,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 227f4be642..52058302be 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1791,6 +1791,17 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
__ Orr(dst, lhs, rhs);
} else if (instr->IsSub()) {
__ Sub(dst, lhs, rhs);
+ } else if (instr->IsRor()) {
+ if (rhs.IsImmediate()) {
+ uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+ __ Ror(dst, lhs, shift);
+ } else {
+ // Ensure shift distance is in the same size register as the result. If
+ // we are rotating a long and the shift comes in a w register originally,
+ // we don't need to sxtw for use as an x since the shift distances are
+ // all & reg_bits - 1.
+ __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
+ }
} else {
DCHECK(instr->IsXor());
__ Eor(dst, lhs, rhs);
@@ -3850,16 +3861,15 @@ void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UN
}
void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
- codegen_->AddSlowPath(slow_path);
-
Location out_loc = load->GetLocations()->Out();
Register out = OutputRegister(load);
Register current_method = InputRegisterAt(load, 0);
@@ -3889,8 +3899,12 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
__ Ldr(out, MemOperand(out.X(), cache_offset));
}
- __ Cbz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ if (!load->IsInDexCache()) {
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Cbz(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderARM64::VisitLocal(HLocal* local) {
@@ -4229,6 +4243,7 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
default:
LOG(FATAL) << "Unexpected rem type " << type;
+ UNREACHABLE();
}
}
@@ -4258,6 +4273,14 @@ void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATT
codegen_->GenerateFrameExit();
}
+void LocationsBuilderARM64::VisitRor(HRor* ror) {
+ HandleBinaryOp(ror);
+}
+
+void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
+ HandleBinaryOp(ror);
+}
+
void LocationsBuilderARM64::VisitShl(HShl* shl) {
HandleShift(shl);
}
@@ -4295,6 +4318,7 @@ void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
default:
LOG(FATAL) << "Unimplemented local type " << field_type;
+ UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index d092de9421..ce7cbcd9d6 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3413,24 +3413,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNU
}
void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
- codegen_->AddSlowPath(slow_path);
-
LocationSummary* locations = load->GetLocations();
Register out = locations->Out().AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
__ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
__ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
__ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- __ Beqz(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+
+ if (!load->IsInDexCache()) {
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Beqz(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
@@ -3913,6 +3917,16 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN
codegen_->GenerateFrameExit();
}
+void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
void LocationsBuilderMIPS::VisitShl(HShl* shl) {
HandleShift(shl);
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78f5644cfb..1a9de15c6f 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3105,16 +3105,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_U
}
void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
- codegen_->AddSlowPath(slow_path);
-
LocationSummary* locations = load->GetLocations();
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
@@ -3123,8 +3122,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
__ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
__ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
// TODO: We will need a read barrier here.
- __ Beqzc(out, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+
+ if (!load->IsInDexCache()) {
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ Beqzc(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
@@ -3519,6 +3523,16 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_
codegen_->GenerateFrameExit();
}
+void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+}
+
void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
HandleShift(shl);
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 19f03df2a0..469dd49a8e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -433,6 +433,56 @@ class ArraySetSlowPathX86 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
};
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
+ public:
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+ x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
public:
@@ -454,7 +504,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
// to be instrumented, e.g.:
//
// __ movl(out, Address(out, offset));
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -470,7 +520,9 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -612,14 +664,18 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -1831,7 +1887,7 @@ void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
}
void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
@@ -3759,6 +3815,92 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register
__ Bind(&done);
}
+void LocationsBuilderX86::VisitRor(HRor* ror) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+ switch (ror->GetResultType()) {
+ case Primitive::kPrimLong:
+ // Add the temporary needed.
+ locations->AddTemp(Location::RequiresRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ // The shift count needs to be in CL (unless it is a constant).
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+
+ if (ror->GetResultType() == Primitive::kPrimInt) {
+ Register first_reg = first.AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ __ rorl(first_reg, second_reg);
+ } else {
+ Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+ __ rorl(first_reg, imm);
+ }
+ return;
+ }
+
+ DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
+ Register first_reg_lo = first.AsRegisterPairLow<Register>();
+ Register first_reg_hi = first.AsRegisterPairHigh<Register>();
+ Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(second_reg, ECX);
+ __ movl(temp_reg, first_reg_hi);
+ __ shrd(first_reg_hi, first_reg_lo, second_reg);
+ __ shrd(first_reg_lo, temp_reg, second_reg);
+ __ movl(temp_reg, first_reg_hi);
+ __ testl(second_reg, Immediate(32));
+ __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
+ __ cmovl(kNotEqual, first_reg_lo, temp_reg);
+ } else {
+ int32_t shift_amt =
+ CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue;
+ if (shift_amt == 0) {
+ // Already fine.
+ return;
+ }
+ if (shift_amt == 32) {
+ // Just swap.
+ __ movl(temp_reg, first_reg_lo);
+ __ movl(first_reg_lo, first_reg_hi);
+ __ movl(first_reg_hi, temp_reg);
+ return;
+ }
+
+ Immediate imm(shift_amt);
+ // Save the constents of the low value.
+ __ movl(temp_reg, first_reg_lo);
+
+ // Shift right into low, feeding bits from high.
+ __ shrd(first_reg_lo, first_reg_hi, imm);
+
+ // Shift right into high, feeding bits from the original low.
+ __ shrd(first_reg_hi, temp_reg, imm);
+
+ // Swap if needed.
+ if (shift_amt > 32) {
+ __ movl(temp_reg, first_reg_lo);
+ __ movl(first_reg_lo, first_reg_hi);
+ __ movl(first_reg_hi, temp_reg);
+ }
+ }
+}
+
void LocationsBuilderX86::VisitShl(HShl* shl) {
HandleShift(shl);
}
@@ -4006,7 +4148,7 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
/*
* According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
* All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
@@ -4260,9 +4402,14 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
// Long values can be loaded atomically into an XMM using movsd.
- // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
- // and then copy the XMM into the output 32bits at a time).
+ // So we use an XMM register as a temp to achieve atomicity (first
+ // load the temp into the XMM and then copy the XMM into the
+ // output, 32 bits at a time).
locations->AddTemp(Location::RequiresFpuRegister());
+ } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4300,9 +4447,32 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
}
case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
__ movl(out.AsRegister<Register>(), Address(base, offset));
break;
+
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ movl(out.AsRegister<Register>(), Address(base, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
+ break;
}
case Primitive::kPrimLong: {
@@ -4337,17 +4507,20 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
- // Longs are handled in the switch.
- if (field_type != Primitive::kPrimLong) {
+ if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
+ // Potential implicit null checks, in the case of reference or
+ // long fields, are handled in the previous switch statement.
+ } else {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
- }
-
- if (field_type == Primitive::kPrimNot) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
}
@@ -4412,7 +4585,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
bool maybe_record_implicit_null_check_done = false;
@@ -4517,7 +4690,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
}
@@ -4698,6 +4871,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
Location::kOutputOverlap :
Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -4705,12 +4883,13 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Location index = locations->InAt(1);
+ Location out_loc = locations->Out();
Primitive::Type type = instruction->GetType();
switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
__ movzxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4722,7 +4901,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
__ movsxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4734,7 +4913,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimShort: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
__ movsxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4746,7 +4925,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
__ movzxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4756,13 +4935,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ case Primitive::kPrimInt: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
__ movl(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4772,20 +4947,56 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ if (index.IsConstant()) {
+ uint32_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ movl(out, Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- Location out = locations->Out();
- DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
+ DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+ __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
+ __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
} else {
- __ movl(out.AsRegisterPairLow<Register>(),
+ __ movl(out_loc.AsRegisterPairLow<Register>(),
Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ movl(out.AsRegisterPairHigh<Register>(),
+ __ movl(out_loc.AsRegisterPairHigh<Register>(),
Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
}
break;
@@ -4793,7 +5004,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movss(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4805,7 +5016,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movsd(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4820,23 +5031,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
UNREACHABLE();
}
- if (type != Primitive::kPrimLong) {
+ if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
+ // Potential implicit null checks, in the case of reference or
+ // long arrays, are handled in the previous switch statement.
+ } else {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
-
- if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location out = locations->Out();
- if (index.IsConstant()) {
- uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
- }
- }
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -4968,12 +5168,12 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
// __ movl(temp2, temp);
// // /* HeapReference<Class> */ temp = temp->component_type_
// __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp_loc, temp_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = register_value->klass_
// __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
//
// __ cmpl(temp, temp2);
@@ -5254,8 +5454,8 @@ void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instructio
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ fs()->cmpw(Address::Absolute(
- Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
+ __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+ Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -5536,32 +5736,16 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leal(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ movl(out, Address(current_method,
ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ leal(out, Address(out, cache_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ movl(out, Address(out, cache_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -5611,49 +5795,36 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
}
void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
- codegen_->AddSlowPath(slow_path);
-
LocationSummary* locations = load->GetLocations();
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leal(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ leal(out, Address(out, cache_offset));
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ movl(out, Address(out, cache_offset));
+ if (!load->IsInDexCache()) {
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
+ codegen_->AddSlowPath(slow_path);
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
-
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
@@ -5693,6 +5864,14 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5718,21 +5897,22 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
+ Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5748,10 +5928,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- __ movl(out, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
if (cls.IsRegister()) {
__ cmpl(out, cls.AsRegister<Register>());
@@ -5772,17 +5951,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
// object to avoid doing a comparison we know will fail.
NearLabel loop;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5811,17 +5981,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(ESP, cls.GetStackIndex()));
}
__ j(kEqual, &success);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
__ j(kNotEqual, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
@@ -5845,17 +6006,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
__ j(kEqual, &exact_check);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ movl(out, Address(out, component_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5899,6 +6051,13 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
/* is_fatal */ false);
@@ -5951,27 +6110,27 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
+ Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5991,8 +6150,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -6014,18 +6172,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// object to avoid doing a comparison we know will fail.
NearLabel loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -6038,8 +6186,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -6065,18 +6212,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
__ j(kEqual, &done);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -6088,8 +6225,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6106,19 +6242,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ movl(temp, Address(temp, component_offset));
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -6132,8 +6257,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -6141,8 +6265,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6159,6 +6282,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -6320,14 +6450,226 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
}
}
-void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ movl(temp.AsRegister<Register>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ Register obj_reg = obj.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset) {
+ Register root_reg = root.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+ Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ leal(root_reg, Address(obj, offset));
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ }
+}
+
+void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Address src(obj, offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ Address src = index.IsConstant() ?
+ Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+ Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as:
+ // - it implements the load-load fence using a data dependency on
+ // the high-bits of rb_state, which are expected to be all zeroes;
+ // - it performs additional checks that we do not do here for
+ // performance reasons.
+
+ Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ movl(temp_reg, Address(obj, monitor_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+ __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // The actual reference load.
+ // /* HeapReference<Object> */ ref = *src
+ __ movl(ref_reg, src);
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -6341,57 +6683,41 @@ void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
__ UnpoisonHeapReference(out.AsRegister<Register>());
}
}
-void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6750,7 +7076,7 @@ Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
// TODO: target as memory.
void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
if (!target.IsValid()) {
- DCHECK(type == Primitive::kPrimVoid);
+ DCHECK_EQ(type, Primitive::kPrimVoid);
return;
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f9403a67c0..712179920b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -219,11 +219,44 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateShlLong(const Location& loc, int shift);
void GenerateShrLong(const Location& loc, int shift);
void GenerateUShrLong(const Location& loc, int shift);
- void GenerateMemoryBarrier(MemBarrierKind kind);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a read barrier.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a Baker's read barrier.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset);
+
// Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
// `is_wide` specifies whether it is long/double or not.
void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -364,6 +397,8 @@ class CodeGeneratorX86 : public CodeGenerator {
Register value,
bool value_can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
Label* GetLabelOf(HBasicBlock* block) const {
return CommonGetLabelOf<Label>(block_labels_, block);
}
@@ -405,7 +440,26 @@ class CodeGeneratorX86 : public CodeGenerator {
void Finalize(CodeAllocator* allocator) OVERRIDE;
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -422,23 +476,25 @@ class CodeGeneratorX86 : public CodeGenerator {
// When `index` is provided (i.e. for array accesses), the offset
// value passed to artReadBarrierSlow is adjusted to take `index`
// into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // Generate a read barrier for a GC root within `instruction`.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -448,9 +504,18 @@ class CodeGeneratorX86 : public CodeGenerator {
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check);
+
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
struct PcRelativeDexCacheAccessInfo {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 44a51ea6e2..2c5fbc78bf 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -456,6 +456,56 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
};
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
public:
@@ -477,7 +527,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
// reference load to be instrumented, e.g.:
//
// __ movl(out, Address(out, offset));
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -493,7 +543,9 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -634,13 +686,17 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -731,7 +787,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
// temp = thread->string_init_entrypoint
__ gs()->movl(temp.AsRegister<CpuRegister>(),
- Address::Absolute(invoke->GetStringInitOffset(), true));
+ Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
@@ -748,7 +804,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
invoke->GetDexCacheArrayOffset());
__ movq(temp.AsRegister<CpuRegister>(),
- Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+ Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
// Bind the label at the end of the "movl" insn.
__ Bind(&pc_relative_dex_cache_patches_.back().label);
break;
@@ -907,7 +963,7 @@ void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(instruction, slow_path);
- __ gs()->call(Address::Absolute(entry_point_offset, true));
+ __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
RecordPcInfo(instruction, dex_pc, slow_path);
}
@@ -1939,7 +1995,7 @@ void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier)
}
void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -2667,7 +2723,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
} else {
DCHECK(in.GetConstant()->IsIntConstant());
__ movl(out.AsRegister<CpuRegister>(),
- Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+ Immediate(static_cast<uint16_t>(
+ in.GetConstant()->AsIntConstant()->GetValue())));
}
break;
@@ -2911,7 +2968,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
__ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ addss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2925,7 +2983,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
__ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ addsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -3000,7 +3059,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
__ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ subss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ subss(first.AsFpuRegister<XmmRegister>(),
@@ -3014,7 +3074,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
__ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ subsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -3121,7 +3182,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
__ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ mulss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -3136,7 +3198,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
__ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ mulsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -3542,7 +3605,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
__ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ divss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3556,7 +3620,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
__ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ divsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3755,6 +3820,56 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
}
default:
LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitRor(HRor* ror) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+ switch (ror->GetResultType()) {
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ // The shift count needs to be in CL (unless it is a constant).
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
+ CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
+ Location second = locations->InAt(1);
+
+ switch (ror->GetResultType()) {
+ case Primitive::kPrimInt:
+ if (second.IsRegister()) {
+ CpuRegister second_reg = second.AsRegister<CpuRegister>();
+ __ rorl(first_reg, second_reg);
+ } else {
+ Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+ __ rorl(first_reg, imm);
+ }
+ break;
+ case Primitive::kPrimLong:
+ if (second.IsRegister()) {
+ CpuRegister second_reg = second.AsRegister<CpuRegister>();
+ __ rorq(first_reg, second_reg);
+ } else {
+ Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
+ __ rorq(first_reg, imm);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+ UNREACHABLE();
}
}
@@ -3910,10 +4025,10 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED
LOG(FATAL) << "Unimplemented";
}
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
/*
* According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
- * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+ * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
* For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
switch (kind) {
@@ -3953,6 +4068,11 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
Location::RequiresRegister(),
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -3988,12 +4108,36 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
__ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
__ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
@@ -4014,14 +4158,20 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
-
- if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ if (field_type == Primitive::kPrimNot) {
+ // Potential implicit null checks, in the case of reference
+ // fields, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (field_type == Primitive::kPrimNot) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+ if (is_volatile) {
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
}
@@ -4075,7 +4225,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
bool maybe_record_implicit_null_check_done = false;
@@ -4181,7 +4331,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
}
@@ -4358,6 +4508,11 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4365,12 +4520,13 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location index = locations->InAt(1);
- Primitive::Type type = instruction->GetType();
+ Location out_loc = locations->Out();
+ Primitive::Type type = instruction->GetType();
switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movzxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4382,7 +4538,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movsxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4394,7 +4550,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimShort: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movsxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4406,7 +4562,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movzxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4416,13 +4572,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ case Primitive::kPrimInt: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movl(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4432,9 +4584,46 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ } else {
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ if (index.IsConstant()) {
+ uint32_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ movl(out, Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movq(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4446,7 +4635,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movss(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4458,7 +4647,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movsd(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4472,20 +4661,12 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location out = locations->Out();
- if (index.IsConstant()) {
- uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
- }
+ // Potential implicit null checks, in the case of reference
+ // arrays, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -4609,12 +4790,12 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
// __ movl(temp2, temp);
// // /* HeapReference<Class> */ temp = temp->component_type_
// __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp_loc, temp_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = register_value->klass_
// __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
//
// __ cmpl(temp, temp2);
@@ -4840,8 +5021,8 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
__ testl(value, value);
__ j(kEqual, &is_null);
}
- __ gs()->movq(card, Address::Absolute(
- Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+ __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true));
__ movq(temp, object);
__ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
__ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4900,8 +5081,9 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ gs()->cmpw(Address::Absolute(
- Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+ __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true),
+ Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -5125,7 +5307,7 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
Immediate(mirror::Class::kStatusInitialized));
__ j(kLess, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
- // No need for memory fence, thanks to the X86_64 memory model.
+ // No need for memory fence, thanks to the x86-64 memory model.
}
void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5156,32 +5338,16 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leaq(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ movq(out, Address(current_method,
ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ leaq(out, Address(out, cache_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ movl(out, Address(out, cache_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -5220,53 +5386,41 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
}
void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
- codegen_->AddSlowPath(slow_path);
-
LocationSummary* locations = load->GetLocations();
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leaq(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ leaq(out, Address(out, cache_offset));
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ movl(out, Address(out, cache_offset));
+ if (!load->IsInDexCache()) {
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
-
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
}
static Address GetExceptionTlsAddress() {
- return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+ return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true);
}
void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5302,6 +5456,14 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5327,21 +5489,22 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5357,10 +5520,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- __ movl(out, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
if (cls.IsRegister()) {
__ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5386,17 +5548,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
// object to avoid doing a comparison we know will fail.
NearLabel loop, success;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5425,17 +5578,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &success);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
__ j(kNotEqual, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
@@ -5459,17 +5603,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
__ j(kEqual, &exact_check);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ movl(out, Address(out, component_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5513,6 +5648,13 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
/* is_fatal */ false);
@@ -5565,27 +5707,27 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5597,7 +5739,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
is_type_check_slow_path_fatal);
codegen_->AddSlowPath(type_check_slow_path);
- NearLabel done;
+ Label done;
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
__ testl(obj, obj);
@@ -5605,8 +5747,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -5628,18 +5769,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// object to avoid doing a comparison we know will fail.
NearLabel loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -5652,8 +5783,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -5679,18 +5809,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
__ j(kEqual, &done);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -5702,8 +5822,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5720,19 +5839,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ movl(temp, Address(temp, component_offset));
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -5746,8 +5854,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -5755,8 +5862,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5773,6 +5879,13 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5916,14 +6029,227 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
}
}
-void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp) {
+ CpuRegister out_reg = out.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp) {
+ CpuRegister out_reg = out.AsRegister<CpuRegister>();
+ CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ CpuRegister obj,
+ uint32_t offset) {
+ CpuRegister root_reg = root.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true),
+ Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ leaq(root_reg, Address(obj, offset));
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Address src(obj, offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ Address src = index.IsConstant() ?
+ Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+ Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as:
+ // - it implements the load-load fence using a data dependency on
+ // the high-bits of rb_state, which are expected to be all zeroes;
+ // - it performs additional checks that we do not do here for
+ // performance reasons.
+
+ CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+ CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ movl(temp_reg, Address(obj, monitor_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+ __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // The actual reference load.
+ // /* HeapReference<Object> */ ref = *src
+ __ movl(ref_reg, src);
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -5937,57 +6263,41 @@ void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
__ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
}
}
-void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6236,7 +6546,7 @@ Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
// TODO: trg as memory.
void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
if (!trg.IsValid()) {
- DCHECK(type == Primitive::kPrimVoid);
+ DCHECK_EQ(type, Primitive::kPrimVoid);
return;
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 145b1f33b4..dda9ea22d9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -47,6 +47,12 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+// These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
+// If the ART ABI changes, this list must be updated. It is used to ensure that
+// these are not clobbered by any direct call to native code (such as math intrinsics).
+static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -207,11 +213,44 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
- void GenerateMemoryBarrier(MemBarrierKind kind);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a read barrier.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a Baker's read barrier.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ CpuRegister obj,
+ uint32_t offset);
+
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -318,6 +357,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister value,
bool value_can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
// Helper method to move a value between two locations.
void Move(Location destination, Location source);
@@ -350,7 +391,26 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return isa_features_;
}
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ CpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ CpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -367,23 +427,25 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// When `index` provided (i.e., when it is different from
// Location::NoLocation()), the offset value passed to
// artReadBarrierSlow is adjusted to take `index` into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // Generate a read barrier for a GC root within `instruction`.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -393,7 +455,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
int ConstantAreaStart() const {
return constant_area_start_;
@@ -418,6 +480,15 @@ class CodeGeneratorX86_64 : public CodeGenerator {
HInstruction* instruction);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check);
+
struct PcRelativeDexCacheAccessInfo {
PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
: target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index af8b8b562a..10d83439fd 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -202,6 +202,11 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst
return true;
}
+ // Our code generator ensures shift distances are within an encodable range.
+ if (instr->IsRor()) {
+ return true;
+ }
+
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index e469c8d6d0..a8f65bf516 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -32,7 +32,7 @@ namespace art {
/**
* Fixture class for the constant folding and dce tests.
*/
-class ConstantFoldingTest : public testing::Test {
+class ConstantFoldingTest : public CommonCompilerTest {
public:
ConstantFoldingTest() : pool_(), allocator_(&pool_) {
graph_ = CreateGraph(&allocator_);
@@ -56,7 +56,7 @@ class ConstantFoldingTest : public testing::Test {
const std::string& expected_after_dce,
std::function<void(HGraph*)> check_after_cf) {
ASSERT_NE(graph_, nullptr);
- graph_->TryBuildingSsa();
+ TransformToSsa(graph_);
StringPrettyPrinter printer_before(graph_);
printer_before.VisitInsertionOrder();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 2c6a1ef63d..f0f98efadb 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -26,6 +26,8 @@
namespace art {
+class DeadCodeEliminationTest : public CommonCompilerTest {};
+
static void TestCode(const uint16_t* data,
const std::string& expected_before,
const std::string& expected_after) {
@@ -34,7 +36,7 @@ static void TestCode(const uint16_t* data,
HGraph* graph = CreateCFG(&allocator, data);
ASSERT_NE(graph, nullptr);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
StringPrettyPrinter printer_before(graph);
printer_before.VisitInsertionOrder();
@@ -55,7 +57,6 @@ static void TestCode(const uint16_t* data,
ASSERT_EQ(actual_after, expected_after);
}
-
/**
* Small three-register program.
*
@@ -69,7 +70,7 @@ static void TestCode(const uint16_t* data,
* L1: v2 <- v0 + v1 5. add-int v2, v0, v1
* return-void 7. return
*/
-TEST(DeadCodeElimination, AdditionAndConditionalJump) {
+TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::CONST_4 | 0 << 8 | 0 << 12,
@@ -131,7 +132,7 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) {
* L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4
* return 13. return-void
*/
-TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) {
+TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) {
const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 << 8 | 0 << 12,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c16b872466..f3c1dbe3f5 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -24,6 +24,7 @@
#include "base/arena_containers.h"
#include "base/bit_vector-inl.h"
#include "base/stringprintf.h"
+#include "handle_scope-inl.h"
namespace art {
@@ -594,6 +595,17 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
}
}
}
+
+ // Ensure that reference type instructions have reference type info.
+ if (instruction->GetType() == Primitive::kPrimNot) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (!instruction->GetReferenceTypeInfo().IsValid()) {
+ AddError(StringPrintf("Reference type instruction %s:%d does not have "
+ "valid reference type information.",
+ instruction->DebugName(),
+ instruction->GetId()));
+ }
+ }
}
static Primitive::Type PrimitiveKind(Primitive::Type type) {
@@ -850,7 +862,7 @@ void SSAChecker::VisitCondition(HCondition* op) {
void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
VisitInstruction(op);
- if (op->IsUShr() || op->IsShr() || op->IsShl()) {
+ if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
AddError(StringPrintf(
"Shift operation %s %d has a non-int kind second input: "
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index fee56c7f9e..d10df4ce3f 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -17,8 +17,6 @@
#include "graph_checker.h"
#include "optimizing_unit_test.h"
-#include "gtest/gtest.h"
-
namespace art {
/**
@@ -43,7 +41,6 @@ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
return graph;
}
-
static void TestCode(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
@@ -61,8 +58,7 @@ static void TestCodeSSA(const uint16_t* data) {
HGraph* graph = CreateCFG(&allocator, data);
ASSERT_NE(graph, nullptr);
- graph->BuildDominatorTree();
- graph->TransformToSsa();
+ TransformToSsa(graph);
SSAChecker ssa_checker(graph);
ssa_checker.Run();
@@ -145,7 +141,9 @@ TEST(GraphChecker, BlockEndingWithNonBranchInstruction) {
ASSERT_FALSE(graph_checker.IsValid());
}
-TEST(SSAChecker, SSAPhi) {
+class SSACheckerTest : public CommonCompilerTest {};
+
+TEST_F(SSACheckerTest, SSAPhi) {
// This code creates one Phi function during the conversion to SSA form.
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e9fdb84d1e..5f1328f545 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,6 +30,7 @@
#include "optimization.h"
#include "reference_type_propagation.h"
#include "register_allocator.h"
+#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
#include "utils/assembler.h"
@@ -505,7 +506,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
} else {
StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
}
- } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
+ } else if ((IsPass(SsaBuilder::kSsaBuilderPassName)
|| IsPass(HInliner::kInlinerPassName))
&& (instruction->GetType() == Primitive::kPrimNot)) {
ReferenceTypeInfo info = instruction->IsLoadClass()
@@ -519,21 +520,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
} else if (instruction->IsLoadClass()) {
StartAttributeStream("klass") << "unresolved";
- } else if (instruction->IsNullConstant()) {
+ } else {
// The NullConstant may be added to the graph during other passes that happen between
// ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
// doesn't run or doesn't inline anything, the NullConstant remains untyped.
// So we should check NullConstants for validity only after reference type propagation.
- //
- // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
- // for the benefits.
- StartAttributeStream("klass") << "not_set";
- DCHECK(!is_after_pass_
- || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
- << " Expected a valid rti after reference type propagation";
- } else {
- DCHECK(!is_after_pass_)
- << "Expected a valid rti after reference type propagation";
+ DCHECK(graph_in_bad_state_ ||
+ (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName)))
+ << instruction->DebugName() << instruction->GetId() << " has invalid rti "
+ << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
}
}
if (disasm_info_ != nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index de60cf21aa..9929696ded 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -21,11 +21,11 @@
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-#include "gtest/gtest.h"
-
namespace art {
-TEST(GVNTest, LocalFieldElimination) {
+class GVNTest : public CommonCompilerTest {};
+
+TEST_F(GVNTest, LocalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
NullHandle<mirror::DexCache> dex_cache;
@@ -100,7 +100,7 @@ TEST(GVNTest, LocalFieldElimination) {
ASSERT_EQ(different_offset->GetBlock(), block);
ASSERT_EQ(use_after_kill->GetBlock(), block);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
SideEffectsAnalysis side_effects(graph);
side_effects.Run();
GVNOptimization(graph, side_effects).Run();
@@ -110,7 +110,7 @@ TEST(GVNTest, LocalFieldElimination) {
ASSERT_EQ(use_after_kill->GetBlock(), block);
}
-TEST(GVNTest, GlobalFieldElimination) {
+TEST_F(GVNTest, GlobalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
NullHandle<mirror::DexCache> dex_cache;
@@ -182,7 +182,7 @@ TEST(GVNTest, GlobalFieldElimination) {
0));
join->AddInstruction(new (&allocator) HExit());
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
SideEffectsAnalysis side_effects(graph);
side_effects.Run();
GVNOptimization(graph, side_effects).Run();
@@ -193,7 +193,7 @@ TEST(GVNTest, GlobalFieldElimination) {
ASSERT_TRUE(join->GetFirstInstruction()->IsExit());
}
-TEST(GVNTest, LoopFieldElimination) {
+TEST_F(GVNTest, LoopFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
NullHandle<mirror::DexCache> dex_cache;
@@ -288,7 +288,7 @@ TEST(GVNTest, LoopFieldElimination) {
ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
{
SideEffectsAnalysis side_effects(graph);
side_effects.Run();
@@ -316,7 +316,7 @@ TEST(GVNTest, LoopFieldElimination) {
}
// Test that inner loops affect the side effects of the outer loop.
-TEST(GVNTest, LoopSideEffects) {
+TEST_F(GVNTest, LoopSideEffects) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
NullHandle<mirror::DexCache> dex_cache;
@@ -364,7 +364,7 @@ TEST(GVNTest, LoopSideEffects) {
inner_loop_exit->AddInstruction(new (&allocator) HGoto());
outer_loop_exit->AddInstruction(new (&allocator) HExit());
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
*outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 5de94f43c9..776c115e9d 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -18,7 +18,6 @@
#include "base/arena_allocator.h"
#include "builder.h"
-#include "gtest/gtest.h"
#include "induction_var_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -28,7 +27,7 @@ namespace art {
/**
* Fixture class for the InductionVarAnalysis tests.
*/
-class InductionVarAnalysisTest : public testing::Test {
+class InductionVarAnalysisTest : public CommonCompilerTest {
public:
InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
graph_ = CreateGraph(&allocator_);
@@ -102,6 +101,7 @@ class InductionVarAnalysisTest : public testing::Test {
basic_[d] = new (&allocator_) HLocal(d);
entry_->AddInstruction(basic_[d]);
loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_));
+ loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto());
HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
loop_header_[d]->AddInstruction(load);
HInstruction* compare = new (&allocator_) HLessThan(load, constant100_);
@@ -168,7 +168,7 @@ class InductionVarAnalysisTest : public testing::Test {
// Performs InductionVarAnalysis (after proper set up).
void PerformInductionVarAnalysis() {
- ASSERT_TRUE(graph_->TryBuildingSsa());
+ TransformToSsa(graph_);
iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
iva_->Run();
}
@@ -212,7 +212,7 @@ TEST_F(InductionVarAnalysisTest, ProperLoopSetup) {
// ..
// }
BuildLoopNest(10);
- ASSERT_TRUE(graph_->TryBuildingSsa());
+ TransformToSsa(graph_);
ASSERT_EQ(entry_->GetLoopInformation(), nullptr);
for (int d = 0; d < 1; d++) {
ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(),
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 128b5bb811..a1c797a80a 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -16,7 +16,6 @@
#include "base/arena_allocator.h"
#include "builder.h"
-#include "gtest/gtest.h"
#include "induction_var_analysis.h"
#include "induction_var_range.h"
#include "nodes.h"
@@ -29,7 +28,7 @@ using Value = InductionVarRange::Value;
/**
* Fixture class for the InductionVarRange tests.
*/
-class InductionVarRangeTest : public testing::Test {
+class InductionVarRangeTest : public CommonCompilerTest {
public:
InductionVarRangeTest() : pool_(), allocator_(&pool_) {
graph_ = CreateGraph(&allocator_);
@@ -105,7 +104,7 @@ class InductionVarRangeTest : public testing::Test {
/** Performs induction variable analysis. */
void PerformInductionVarAnalysis() {
- ASSERT_TRUE(graph_->TryBuildingSsa());
+ TransformToSsa(graph_);
iva_->Run();
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a4dcb3aeba..db1170909f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -33,6 +33,7 @@
#include "reference_type_propagation.h"
#include "register_allocator.h"
#include "sharpening.h"
+#include "ssa_builder.h"
#include "ssa_phi_elimination.h"
#include "scoped_thread_state_change.h"
#include "thread.h"
@@ -514,7 +515,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
return false;
}
- if (!callee_graph->TryBuildingSsa()) {
+ if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
<< " could not be transformed to SSA";
return false;
@@ -549,14 +550,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
// Run simple optimizations on the graph.
HDeadCodeElimination dce(callee_graph, stats_);
HConstantFolding fold(callee_graph);
- ReferenceTypePropagation type_propagation(callee_graph, handles_);
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
InstructionSimplifier simplify(callee_graph, stats_);
IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
HOptimization* optimizations[] = {
&intrinsics,
- &type_propagation,
&sharpening,
&simplify,
&fold,
@@ -677,42 +676,36 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
}
- // When merging the graph we might create a new NullConstant in the caller graph which does
- // not have the chance to be typed. We assign the correct type here so that we can keep the
- // assertion that every reference has a valid type. This also simplifies checks along the way.
- HNullConstant* null_constant = graph_->GetNullConstant();
- if (!null_constant->GetReferenceTypeInfo().IsValid()) {
- ReferenceTypeInfo::TypeHandle obj_handle =
- handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
- null_constant->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
- }
-
// Check the integrity of reference types and run another type propagation if needed.
- if ((return_replacement != nullptr)
- && (return_replacement->GetType() == Primitive::kPrimNot)) {
- if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
- // Make sure that we have a valid type for the return. We may get an invalid one when
- // we inline invokes with multiple branches and create a Phi for the result.
- // TODO: we could be more precise by merging the phi inputs but that requires
- // some functionality from the reference type propagation.
- DCHECK(return_replacement->IsPhi());
- size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
- ReferenceTypeInfo::TypeHandle return_handle =
- handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
- return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
- return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
- }
+ if (return_replacement != nullptr) {
+ if (return_replacement->GetType() == Primitive::kPrimNot) {
+ if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
+ // Make sure that we have a valid type for the return. We may get an invalid one when
+ // we inline invokes with multiple branches and create a Phi for the result.
+ // TODO: we could be more precise by merging the phi inputs but that requires
+ // some functionality from the reference type propagation.
+ DCHECK(return_replacement->IsPhi());
+ size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+ ReferenceTypeInfo::TypeHandle return_handle =
+ handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
+ return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
+ return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
+ }
- if (do_rtp) {
- // If the return type is a refinement of the declared type run the type propagation again.
- ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
- ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
- if (invoke_rti.IsStrictSupertypeOf(return_rti)
- || (return_rti.IsExact() && !invoke_rti.IsExact())
- || !return_replacement->CanBeNull()) {
- ReferenceTypePropagation rtp_fixup(graph_, handles_);
- rtp_fixup.Run();
+ if (do_rtp) {
+ // If the return type is a refinement of the declared type run the type propagation again.
+ ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+ ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+ if (invoke_rti.IsStrictSupertypeOf(return_rti)
+ || (return_rti.IsExact() && !invoke_rti.IsExact())
+ || !return_replacement->CanBeNull()) {
+ ReferenceTypePropagation(graph_, handles_).Run();
+ }
+ }
+ } else if (return_replacement->IsInstanceOf()) {
+ if (do_rtp) {
+ // Inlining InstanceOf into an If may put a tighter bound on reference types.
+ ReferenceTypePropagation(graph_, handles_).Run();
}
}
}
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2f3df7fc68..e1b13c5087 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -39,6 +39,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
}
}
+ bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+ bool TryReplaceWithRotate(HBinaryOperation* instruction);
+ bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+ bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+ bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+
bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
void VisitShift(HBinaryOperation* shift);
@@ -77,6 +83,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
+ void SimplifyRotate(HInvoke* invoke, bool is_left);
void SimplifySystemArrayCopy(HInvoke* invoke);
void SimplifyStringEquals(HInvoke* invoke);
@@ -173,6 +180,161 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
}
}
+static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) {
+ return (sub->GetRight() == other &&
+ sub->GetLeft()->IsConstant() &&
+ (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0);
+}
+
+bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
+ HUShr* ushr,
+ HShl* shl) {
+ DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+ HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(),
+ ushr->GetLeft(),
+ ushr->GetRight());
+ op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror);
+ if (!ushr->HasUses()) {
+ ushr->GetBlock()->RemoveInstruction(ushr);
+ }
+ if (!ushr->GetRight()->HasUses()) {
+ ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight());
+ }
+ if (!shl->HasUses()) {
+ shl->GetBlock()->RemoveInstruction(shl);
+ }
+ if (!shl->GetRight()->HasUses()) {
+ shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
+ }
+ return true;
+}
+
+// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
+bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
+ // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+ // TODO: Implement it for MIPS/64.
+ const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+ switch (instruction_set) {
+ case kArm:
+ case kArm64:
+ case kThumb2:
+ case kX86:
+ case kX86_64:
+ break;
+ default:
+ return false;
+ }
+ DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+ HInstruction* left = op->GetLeft();
+ HInstruction* right = op->GetRight();
+ // If we have an UShr and a Shl (in either order).
+ if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) {
+ HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr();
+ HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl();
+ DCHECK(Primitive::IsIntOrLongType(ushr->GetType()));
+ if (ushr->GetType() == shl->GetType() &&
+ ushr->GetLeft() == shl->GetLeft()) {
+ if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) {
+ // Shift distances are both constant, try replacing with Ror if they
+ // add up to the register size.
+ return TryReplaceWithRotateConstantPattern(op, ushr, shl);
+ } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) {
+ // Shift distances are potentially of the form x and (reg_size - x).
+ return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl);
+ } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) {
+ // Shift distances are potentially of the form d and -d.
+ return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl);
+ }
+ }
+ }
+ return false;
+}
+
+// Try replacing code looking like (x >>> #rdist OP x << #ldist):
+// UShr dst, x, #rdist
+// Shl tmp, x, #ldist
+// OP dst, dst, tmp
+// or like (x >>> #rdist OP x << #-ldist):
+// UShr dst, x, #rdist
+// Shl tmp, x, #-ldist
+// OP dst, dst, tmp
+// with
+// Ror dst, x, #rdist
+bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op,
+ HUShr* ushr,
+ HShl* shl) {
+ DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+ size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+ size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant());
+ size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant());
+ if (((ldist + rdist) & (reg_bits - 1)) == 0) {
+ ReplaceRotateWithRor(op, ushr, shl);
+ return true;
+ }
+ return false;
+}
+
+// Replace code looking like (x >>> -d OP x << d):
+// Neg neg, d
+// UShr dst, x, neg
+// Shl tmp, x, d
+// OP dst, dst, tmp
+// with
+// Neg neg, d
+// Ror dst, x, neg
+// *** OR ***
+// Replace code looking like (x >>> d OP x << -d):
+// UShr dst, x, d
+// Neg neg, d
+// Shl tmp, x, neg
+// OP dst, dst, tmp
+// with
+// Ror dst, x, d
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op,
+ HUShr* ushr,
+ HShl* shl) {
+ DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+ DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg());
+ bool neg_is_left = shl->GetRight()->IsNeg();
+ HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg();
+ // And the shift distance being negated is the distance being shifted the other way.
+ if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) {
+ ReplaceRotateWithRor(op, ushr, shl);
+ }
+ return false;
+}
+
+// Try replacing code looking like (x >>> d OP x << (#bits - d)):
+// UShr dst, x, d
+// Sub ld, #bits, d
+// Shl tmp, x, ld
+// OP dst, dst, tmp
+// with
+// Ror dst, x, d
+// *** OR ***
+// Replace code looking like (x >>> (#bits - d) OP x << d):
+// Sub rd, #bits, d
+// UShr dst, x, rd
+// Shl tmp, x, d
+// OP dst, dst, tmp
+// with
+// Neg neg, d
+// Ror dst, x, neg
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op,
+ HUShr* ushr,
+ HShl* shl) {
+ DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+ DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub());
+ size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+ HInstruction* shl_shift = shl->GetRight();
+ HInstruction* ushr_shift = ushr->GetRight();
+ if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) ||
+ (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) {
+ return ReplaceRotateWithRor(op, ushr, shl);
+ }
+ return false;
+}
+
void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
HInstruction* obj = null_check->InputAt(0);
if (!obj->CanBeNull()) {
@@ -530,7 +692,10 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
RecordSimplification();
neg->GetBlock()->RemoveInstruction(neg);
+ return;
}
+
+ TryReplaceWithRotate(instruction);
}
void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -906,7 +1071,10 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) {
// src
instruction->ReplaceWith(instruction->GetLeft());
instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
}
+
+ TryReplaceWithRotate(instruction);
}
void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1027,6 +1195,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
RecordSimplification();
return;
}
+
+ TryReplaceWithRotate(instruction);
}
void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
@@ -1095,6 +1265,42 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
}
}
+void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+ // This simplification is currently supported on x86, x86_64, ARM and ARM64.
+ // TODO: Implement it for MIPS/64.
+ const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+ switch (instruction_set) {
+ case kArm:
+ case kArm64:
+ case kThumb2:
+ case kX86:
+ case kX86_64:
+ break;
+ default:
+ return;
+ }
+ HInstruction* value = invoke->InputAt(0);
+ HInstruction* distance = invoke->InputAt(1);
+ // Replace the invoke with an HRor.
+ if (is_left) {
+ distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance);
+ invoke->GetBlock()->InsertInstructionBefore(distance, invoke);
+ }
+ HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance);
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
+ // Remove ClinitCheck and LoadClass, if possible.
+ HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+ if (clinit->IsClinitCheck() && !clinit->HasUses()) {
+ clinit->GetBlock()->RemoveInstruction(clinit);
+ HInstruction* ldclass = clinit->InputAt(0);
+ if (ldclass->IsLoadClass() && !ldclass->HasUses()) {
+ ldclass->GetBlock()->RemoveInstruction(ldclass);
+ }
+ }
+}
+
static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
if (potential_length->IsArrayLength()) {
return potential_length->InputAt(0) == potential_array;
@@ -1165,6 +1371,12 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
SimplifyStringEquals(instruction);
} else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
SimplifySystemArrayCopy(instruction);
+ } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight ||
+ instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) {
+ SimplifyRotate(instruction, false);
+ } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
+ instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
+ SimplifyRotate(instruction, true);
}
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6a34b13320..6bbc751bee 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -49,6 +49,7 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio
GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
HArm64IntermediateAddress* address =
new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
+ address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
access->GetBlock()->InsertInstructionBefore(address, access);
access->ReplaceInput(address, 0);
// Both instructions must depend on GC to prevent any instruction that can
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 834081188b..7127215c51 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -189,6 +189,42 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
return ((method.d.data & kIntrinsicFlagMin) == 0) ?
Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
+ // More math builtins.
+ case kIntrinsicCos:
+ return Intrinsics::kMathCos;
+ case kIntrinsicSin:
+ return Intrinsics::kMathSin;
+ case kIntrinsicAcos:
+ return Intrinsics::kMathAcos;
+ case kIntrinsicAsin:
+ return Intrinsics::kMathAsin;
+ case kIntrinsicAtan:
+ return Intrinsics::kMathAtan;
+ case kIntrinsicAtan2:
+ return Intrinsics::kMathAtan2;
+ case kIntrinsicCbrt:
+ return Intrinsics::kMathCbrt;
+ case kIntrinsicCosh:
+ return Intrinsics::kMathCosh;
+ case kIntrinsicExp:
+ return Intrinsics::kMathExp;
+ case kIntrinsicExpm1:
+ return Intrinsics::kMathExpm1;
+ case kIntrinsicHypot:
+ return Intrinsics::kMathHypot;
+ case kIntrinsicLog:
+ return Intrinsics::kMathLog;
+ case kIntrinsicLog10:
+ return Intrinsics::kMathLog10;
+ case kIntrinsicNextAfter:
+ return Intrinsics::kMathNextAfter;
+ case kIntrinsicSinh:
+ return Intrinsics::kMathSinh;
+ case kIntrinsicTan:
+ return Intrinsics::kMathTan;
+ case kIntrinsicTanh:
+ return Intrinsics::kMathTanh;
+
// Misc math.
case kIntrinsicSqrt:
return Intrinsics::kMathSqrt;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 5329b5c1b7..e8181bbb06 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -240,178 +240,6 @@ void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke)
GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
}
-static void GenIntegerRotate(LocationSummary* locations,
- ArmAssembler* assembler,
- bool is_left) {
- Register in = locations->InAt(0).AsRegister<Register>();
- Location rhs = locations->InAt(1);
- Register out = locations->Out().AsRegister<Register>();
-
- if (rhs.IsConstant()) {
- // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
- // so map all rotations to a +ve. equivalent in that range.
- // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
- uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
- if (rot) {
- // Rotate, mapping left rotations to right equivalents if necessary.
- // (e.g. left by 2 bits == right by 30.)
- __ Ror(out, in, is_left ? (0x20 - rot) : rot);
- } else if (out != in) {
- __ Mov(out, in);
- }
- } else {
- if (is_left) {
- __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
- __ Ror(out, in, out);
- } else {
- __ Ror(out, in, rhs.AsRegister<Register>());
- }
- }
-}
-
-// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
-// rotates by swapping input regs (effectively rotating by the first 32-bits of
-// a larger rotation) or flipping direction (thus treating larger right/left
-// rotations as sub-word sized rotations in the other direction) as appropriate.
-static void GenLongRotate(LocationSummary* locations,
- ArmAssembler* assembler,
- bool is_left) {
- Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Location rhs = locations->InAt(1);
- Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- if (rhs.IsConstant()) {
- uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
- // Map all left rotations to right equivalents.
- if (is_left) {
- rot = 0x40 - rot;
- }
- // Map all rotations to +ve. equivalents on the interval [0,63].
- rot &= 0x3F;
- // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
- // logic below to a simple pair of binary orr.
- // (e.g. 34 bits == in_reg swap + 2 bits right.)
- if (rot >= 0x20) {
- rot -= 0x20;
- std::swap(in_reg_hi, in_reg_lo);
- }
- // Rotate, or mov to out for zero or word size rotations.
- if (rot) {
- __ Lsr(out_reg_hi, in_reg_hi, rot);
- __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
- __ Lsr(out_reg_lo, in_reg_lo, rot);
- __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
- } else {
- __ Mov(out_reg_lo, in_reg_lo);
- __ Mov(out_reg_hi, in_reg_hi);
- }
- } else {
- Register shift_left = locations->GetTemp(0).AsRegister<Register>();
- Register shift_right = locations->GetTemp(1).AsRegister<Register>();
- Label end;
- Label right;
-
- __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
- __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
- __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
-
- if (is_left) {
- __ b(&right, CS);
- } else {
- __ b(&right, CC);
- std::swap(shift_left, shift_right);
- }
-
- // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
- // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
- __ Lsl(out_reg_hi, in_reg_hi, shift_left);
- __ Lsr(out_reg_lo, in_reg_lo, shift_right);
- __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
- __ Lsl(out_reg_lo, in_reg_lo, shift_left);
- __ Lsr(shift_left, in_reg_hi, shift_right);
- __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
- __ b(&end);
-
- // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
- // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
- __ Bind(&right);
- __ Lsr(out_reg_hi, in_reg_hi, shift_right);
- __ Lsl(out_reg_lo, in_reg_lo, shift_left);
- __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
- __ Lsr(out_reg_lo, in_reg_lo, shift_right);
- __ Lsl(shift_right, in_reg_hi, shift_left);
- __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
-
- __ Bind(&end);
- }
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
- GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- if (invoke->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- }
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
- GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
- GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- if (invoke->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- }
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
- GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
Location in = locations->InAt(0);
Location out = locations->Out();
@@ -1700,8 +1528,12 @@ void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongReverse)
UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
@@ -1718,6 +1550,23 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 962c4d5167..6b34daadf0 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -330,103 +330,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke
GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
}
-static void GenRotateRight(LocationSummary* locations,
- Primitive::Type type,
- vixl::MacroAssembler* masm) {
- DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
- Location in = locations->InAt(0);
- Location out = locations->Out();
- Operand rhs = OperandFrom(locations->InAt(1), type);
-
- if (rhs.IsImmediate()) {
- uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
- __ Ror(RegisterFrom(out, type),
- RegisterFrom(in, type),
- shift);
- } else {
- DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
- __ Ror(RegisterFrom(out, type),
- RegisterFrom(in, type),
- rhs.reg());
- }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
- GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
- GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
-static void GenRotateLeft(LocationSummary* locations,
- Primitive::Type type,
- vixl::MacroAssembler* masm) {
- DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
- Location in = locations->InAt(0);
- Location out = locations->Out();
- Operand rhs = OperandFrom(locations->InAt(1), type);
-
- if (rhs.IsImmediate()) {
- uint32_t regsize = RegisterFrom(in, type).SizeInBits();
- uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
- __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
- } else {
- DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
- __ Neg(RegisterFrom(out, type),
- Operand(RegisterFrom(locations->InAt(1), type)));
- __ Ror(RegisterFrom(out, type),
- RegisterFrom(in, type),
- RegisterFrom(out, type));
- }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
- GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
- GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
static void GenReverse(LocationSummary* locations,
Primitive::Type type,
vixl::MacroAssembler* masm) {
@@ -1527,11 +1430,33 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU
void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
#undef UNIMPLEMENTED_INTRINSIC
#undef __
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 8f1d5e1c4d..96f43a0f74 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -51,6 +51,23 @@
V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \
V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \
V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathCos, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathSin, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathExp, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathLog, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathTan, kStatic, kNeedsEnvironmentOrCache) \
+ V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \
V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \
V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \
V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 9ecce0e93a..06fab616ad 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -989,6 +989,23 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
#undef UNIMPLEMENTED_INTRINSIC
#undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 36e1b20e4e..8aa7d9ff6f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1730,6 +1730,24 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
+UNIMPLEMENTED_INTRINSIC(MathCos)
+UNIMPLEMENTED_INTRINSIC(MathSin)
+UNIMPLEMENTED_INTRINSIC(MathAcos)
+UNIMPLEMENTED_INTRINSIC(MathAsin)
+UNIMPLEMENTED_INTRINSIC(MathAtan)
+UNIMPLEMENTED_INTRINSIC(MathAtan2)
+UNIMPLEMENTED_INTRINSIC(MathCbrt)
+UNIMPLEMENTED_INTRINSIC(MathCosh)
+UNIMPLEMENTED_INTRINSIC(MathExp)
+UNIMPLEMENTED_INTRINSIC(MathExpm1)
+UNIMPLEMENTED_INTRINSIC(MathHypot)
+UNIMPLEMENTED_INTRINSIC(MathLog)
+UNIMPLEMENTED_INTRINSIC(MathLog10)
+UNIMPLEMENTED_INTRINSIC(MathNextAfter)
+UNIMPLEMENTED_INTRINSIC(MathSinh)
+UNIMPLEMENTED_INTRINSIC(MathTan)
+UNIMPLEMENTED_INTRINSIC(MathTanh)
+
#undef UNIMPLEMENTED_INTRINSIC
#undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 5b67cdefa3..fd454d8322 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -788,6 +788,195 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
__ Bind(&done);
}
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK(locations->WillCall());
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ X86Assembler* assembler = codegen->GetAssembler();
+
+ // We need some place to pass the parameters.
+ __ subl(ESP, Immediate(16));
+ __ cfi().AdjustCFAOffset(16);
+
+ // Pass the parameters at the bottom of the stack.
+ __ movsd(Address(ESP, 0), XMM0);
+
+ // If we have a second parameter, pass it next.
+ if (invoke->GetNumberOfArguments() == 2) {
+ __ movsd(Address(ESP, 8), XMM1);
+ }
+
+ // Now do the actual call.
+ __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry)));
+
+ // Extract the return value from the FP stack.
+ __ fstpl(Address(ESP, 0));
+ __ movsd(XMM0, Address(ESP, 0));
+
+ // And clean up the stack.
+ __ addl(ESP, Immediate(16));
+ __ cfi().AdjustCFAOffset(-16);
+
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1601,12 +1790,27 @@ static void GenUnsafeGet(HInvoke* invoke,
Location output_loc = locations->Out();
switch (type) {
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
Register output = output_loc.AsRegister<Register>();
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
- if (type == Primitive::kPrimNot) {
- codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ Register output = output_loc.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ codegen->GenerateReadBarrierSlow(
+ invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ }
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ MaybeUnpoisonHeapReference(output);
}
break;
}
@@ -1634,8 +1838,10 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
- bool is_long, bool is_volatile) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type,
+ bool is_volatile) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1647,7 +1853,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
- if (is_long) {
+ if (type == Primitive::kPrimLong) {
if (is_volatile) {
// Need to use XMM to read volatile.
locations->AddTemp(Location::RequiresFpuRegister());
@@ -1658,25 +1864,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
} else {
locations->SetOut(Location::RequiresRegister());
}
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ false);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ true);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
}
@@ -2277,56 +2488,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke)
GenTrailingZeros(assembler, invoke, /* is_long */ true);
}
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
- LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL or a constant.
- locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1)));
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) {
- LocationSummary* locations = invoke->GetLocations();
- Register first_reg = locations->InAt(0).AsRegister<Register>();
- Location second = locations->InAt(1);
-
- if (second.IsRegister()) {
- Register second_reg = second.AsRegister<Register>();
- if (is_left) {
- __ roll(first_reg, second_reg);
- } else {
- __ rorl(first_reg, second_reg);
- }
- } else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
- if (is_left) {
- __ roll(first_reg, imm);
- } else {
- __ rorl(first_reg, imm);
- }
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) {
- X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_left */ false);
-}
-
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -2337,6 +2498,8 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ecd129f31e..ce737e3f7e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -703,6 +703,188 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
__ Bind(&done);
}
+static void CreateFPToFPCallLocations(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+ // We have to ensure that the native code doesn't clobber the XMM registers which are
+ // non-volatile for ART, but volatile for Native calls. This will ensure that they are
+ // saved in the prologue and properly restored.
+ for (auto fp_reg : non_volatile_xmm_regs) {
+ locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+ }
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
+ QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK(locations->WillCall());
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ X86_64Assembler* assembler = codegen->GetAssembler();
+
+ __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+
+ // We have to ensure that the native code doesn't clobber the XMM registers which are
+ // non-volatile for ART, but volatile for Native calls. This will ensure that they are
+ // saved in the prologue and properly restored.
+ for (auto fp_reg : non_volatile_xmm_regs) {
+ locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1735,16 +1917,30 @@ static void GenUnsafeGet(HInvoke* invoke,
Location offset_loc = locations->InAt(2);
CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
Location output_loc = locations->Out();
- CpuRegister output = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister output = output_loc.AsRegister<CpuRegister>();
switch (type) {
case Primitive::kPrimInt:
- case Primitive::kPrimNot:
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
- if (type == Primitive::kPrimNot) {
- codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ break;
+
+ case Primitive::kPrimNot: {
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ codegen->GenerateReadBarrierSlow(
+ invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ }
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ MaybeUnpoisonHeapReference(output);
}
break;
+ }
case Primitive::kPrimLong:
__ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1756,7 +1952,9 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1769,25 +1967,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
@@ -2295,92 +2498,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok
GenTrailingZeros(assembler, invoke, /* is_long */ true);
}
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
- LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL or a constant.
- locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
- LocationSummary* locations = invoke->GetLocations();
- CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
- Location second = locations->InAt(1);
-
- if (is_long) {
- if (second.IsRegister()) {
- CpuRegister second_reg = second.AsRegister<CpuRegister>();
- if (is_left) {
- __ rolq(first_reg, second_reg);
- } else {
- __ rorq(first_reg, second_reg);
- }
- } else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
- if (is_left) {
- __ rolq(first_reg, imm);
- } else {
- __ rorq(first_reg, imm);
- }
- }
- } else {
- if (second.IsRegister()) {
- CpuRegister second_reg = second.AsRegister<CpuRegister>();
- if (is_left) {
- __ roll(first_reg, second_reg);
- } else {
- __ rorl(first_reg, second_reg);
- }
- } else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
- if (is_left) {
- __ roll(first_reg, imm);
- } else {
- __ rorl(first_reg, imm);
- }
- }
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
- CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
-}
-
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -2390,6 +2507,10 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
}
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
#undef UNIMPLEMENTED_INTRINSIC
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2bb769a430..956de2cb8a 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -16,7 +16,6 @@
#include "base/arena_allocator.h"
#include "builder.h"
-#include "gtest/gtest.h"
#include "licm.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -27,7 +26,7 @@ namespace art {
/**
* Fixture class for the LICM tests.
*/
-class LICMTest : public testing::Test {
+class LICMTest : public CommonCompilerTest {
public:
LICMTest() : pool_(), allocator_(&pool_) {
graph_ = CreateGraph(&allocator_);
@@ -70,16 +69,16 @@ class LICMTest : public testing::Test {
loop_preheader_->AddInstruction(new (&allocator_) HGoto());
loop_header_->AddInstruction(new (&allocator_) HIf(parameter_));
loop_body_->AddInstruction(new (&allocator_) HGoto());
+ return_->AddInstruction(new (&allocator_) HReturnVoid());
exit_->AddInstruction(new (&allocator_) HExit());
}
// Performs LICM optimizations (after proper set up).
void PerformLICM() {
- ASSERT_TRUE(graph_->TryBuildingSsa());
+ TransformToSsa(graph_);
SideEffectsAnalysis side_effects(graph_);
side_effects.Run();
- LICM licm(graph_, side_effects);
- licm.Run();
+ LICM(graph_, side_effects).Run();
}
// General building fields.
@@ -169,10 +168,10 @@ TEST_F(LICMTest, ArrayHoisting) {
// Populate the loop with instructions: set/get array with different types.
HInstruction* get_array = new (&allocator_) HArrayGet(
- parameter_, constant_, Primitive::kPrimLong, 0);
+ parameter_, constant_, Primitive::kPrimByte, 0);
loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
HInstruction* set_array = new (&allocator_) HArraySet(
- parameter_, constant_, constant_, Primitive::kPrimInt, 0);
+ parameter_, constant_, constant_, Primitive::kPrimShort, 0);
loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
EXPECT_EQ(get_array->GetBlock(), loop_body_);
@@ -187,10 +186,10 @@ TEST_F(LICMTest, NoArrayHoisting) {
// Populate the loop with instructions: set/get array with same types.
HInstruction* get_array = new (&allocator_) HArrayGet(
- parameter_, constant_, Primitive::kPrimLong, 0);
+ parameter_, constant_, Primitive::kPrimByte, 0);
loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
HInstruction* set_array = new (&allocator_) HArraySet(
- parameter_, get_array, constant_, Primitive::kPrimLong, 0);
+ parameter_, get_array, constant_, Primitive::kPrimByte, 0);
loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
EXPECT_EQ(get_array->GetBlock(), loop_body_);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index a059766e00..ed275b1544 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -29,13 +29,12 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
-#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
-#include "gtest/gtest.h"
-
namespace art {
+class LinearizeTest : public CommonCompilerTest {};
+
template <size_t number_of_blocks>
static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) {
ArenaPool pool;
@@ -46,7 +45,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb
bool graph_built = builder.BuildGraph(*item);
ASSERT_TRUE(graph_built);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
@@ -60,7 +59,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb
}
}
-TEST(LinearizeTest, CFG1) {
+TEST_F(LinearizeTest, CFG1) {
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -85,7 +84,7 @@ TEST(LinearizeTest, CFG1) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG2) {
+TEST_F(LinearizeTest, CFG2) {
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -110,7 +109,7 @@ TEST(LinearizeTest, CFG2) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG3) {
+TEST_F(LinearizeTest, CFG3) {
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -137,7 +136,7 @@ TEST(LinearizeTest, CFG3) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG4) {
+TEST_F(LinearizeTest, CFG4) {
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -167,7 +166,7 @@ TEST(LinearizeTest, CFG4) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG5) {
+TEST_F(LinearizeTest, CFG5) {
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -197,7 +196,7 @@ TEST(LinearizeTest, CFG5) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG6) {
+TEST_F(LinearizeTest, CFG6) {
// Block0
// |
// Block1
@@ -223,7 +222,7 @@ TEST(LinearizeTest, CFG6) {
TestCode(data, blocks);
}
-TEST(LinearizeTest, CFG7) {
+TEST_F(LinearizeTest, CFG7) {
// Structure of this graph (+ are back edges)
// Block0
// |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 7f67560692..926f9399a5 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -27,10 +27,10 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "gtest/gtest.h"
-
namespace art {
+class LiveRangesTest : public CommonCompilerTest {};
+
static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
@@ -39,13 +39,13 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
// Suspend checks implementation may change in the future, and this test relies
// on how instructions are ordered.
RemoveSuspendChecks(graph);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
return graph;
}
-TEST(LiveRangesTest, CFG1) {
+TEST_F(LiveRangesTest, CFG1) {
/*
* Test the following snippet:
* return 0;
@@ -83,7 +83,7 @@ TEST(LiveRangesTest, CFG1) {
ASSERT_TRUE(range->GetNext() == nullptr);
}
-TEST(LiveRangesTest, CFG2) {
+TEST_F(LiveRangesTest, CFG2) {
/*
* Test the following snippet:
* var a = 0;
@@ -131,7 +131,7 @@ TEST(LiveRangesTest, CFG2) {
ASSERT_TRUE(range->GetNext() == nullptr);
}
-TEST(LiveRangesTest, CFG3) {
+TEST_F(LiveRangesTest, CFG3) {
/*
* Test the following snippet:
* var a = 0;
@@ -204,7 +204,7 @@ TEST(LiveRangesTest, CFG3) {
ASSERT_TRUE(range->GetNext() == nullptr);
}
-TEST(LiveRangesTest, Loop1) {
+TEST_F(LiveRangesTest, Loop1) {
/*
* Test the following snippet:
* var a = 0;
@@ -284,7 +284,7 @@ TEST(LiveRangesTest, Loop1) {
ASSERT_TRUE(range->GetNext() == nullptr);
}
-TEST(LiveRangesTest, Loop2) {
+TEST_F(LiveRangesTest, Loop2) {
/*
* Test the following snippet:
* var a = 0;
@@ -360,7 +360,7 @@ TEST(LiveRangesTest, Loop2) {
ASSERT_TRUE(range->GetNext() == nullptr);
}
-TEST(LiveRangesTest, CFG4) {
+TEST_F(LiveRangesTest, CFG4) {
/*
* Test the following snippet:
* var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 9d7d0b6c67..7736eedae1 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -27,10 +27,10 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "gtest/gtest.h"
-
namespace art {
+class LivenessTest : public CommonCompilerTest {};
+
static void DumpBitVector(BitVector* vector,
std::ostream& buffer,
size_t count,
@@ -51,7 +51,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
ASSERT_TRUE(graph_built);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
@@ -75,7 +75,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
ASSERT_STREQ(expected, buffer.str().c_str());
}
-TEST(LivenessTest, CFG1) {
+TEST_F(LivenessTest, CFG1) {
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -98,7 +98,7 @@ TEST(LivenessTest, CFG1) {
TestCode(data, expected);
}
-TEST(LivenessTest, CFG2) {
+TEST_F(LivenessTest, CFG2) {
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -120,7 +120,7 @@ TEST(LivenessTest, CFG2) {
TestCode(data, expected);
}
-TEST(LivenessTest, CFG3) {
+TEST_F(LivenessTest, CFG3) {
const char* expected =
"Block 0\n" // entry block
" live in: (000)\n"
@@ -149,7 +149,7 @@ TEST(LivenessTest, CFG3) {
TestCode(data, expected);
}
-TEST(LivenessTest, CFG4) {
+TEST_F(LivenessTest, CFG4) {
// var a;
// if (0 == 0) {
// a = 5;
@@ -197,7 +197,7 @@ TEST(LivenessTest, CFG4) {
TestCode(data, expected);
}
-TEST(LivenessTest, CFG5) {
+TEST_F(LivenessTest, CFG5) {
// var a = 0;
// if (0 == 0) {
// } else {
@@ -242,7 +242,7 @@ TEST(LivenessTest, CFG5) {
TestCode(data, expected);
}
-TEST(LivenessTest, Loop1) {
+TEST_F(LivenessTest, Loop1) {
// Simple loop with one preheader and one back edge.
// var a = 0;
// while (a == a) {
@@ -288,7 +288,7 @@ TEST(LivenessTest, Loop1) {
TestCode(data, expected);
}
-TEST(LivenessTest, Loop3) {
+TEST_F(LivenessTest, Loop3) {
// Test that the returned value stays live in a preceding loop.
// var a = 0;
// while (a == a) {
@@ -335,7 +335,7 @@ TEST(LivenessTest, Loop3) {
}
-TEST(LivenessTest, Loop4) {
+TEST_F(LivenessTest, Loop4) {
// Make sure we support a preheader of a loop not being the first predecessor
// in the predecessor list of the header.
// var a = 0;
@@ -387,7 +387,7 @@ TEST(LivenessTest, Loop4) {
TestCode(data, expected);
}
-TEST(LivenessTest, Loop5) {
+TEST_F(LivenessTest, Loop5) {
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
// Bitsets are made of:
@@ -443,7 +443,7 @@ TEST(LivenessTest, Loop5) {
TestCode(data, expected);
}
-TEST(LivenessTest, Loop6) {
+TEST_F(LivenessTest, Loop6) {
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2)
const char* expected =
@@ -494,7 +494,7 @@ TEST(LivenessTest, Loop6) {
}
-TEST(LivenessTest, Loop7) {
+TEST_F(LivenessTest, Loop7) {
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2, phi in block 6)
const char* expected =
@@ -548,7 +548,7 @@ TEST(LivenessTest, Loop7) {
TestCode(data, expected);
}
-TEST(LivenessTest, Loop8) {
+TEST_F(LivenessTest, Loop8) {
// var a = 0;
// while (a == a) {
// a = a + a;
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 389ada7504..adde00464b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -335,16 +335,24 @@ class HeapLocationCollector : public HGraphVisitor {
return true;
}
- ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) {
- ReferenceInfo* ref_info = FindReferenceInfoOf(ref);
+ ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+ ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
if (ref_info == nullptr) {
size_t pos = ref_info_array_.size();
- ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos);
+ ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
ref_info_array_.push_back(ref_info);
}
return ref_info;
}
+ void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+ if (instruction->GetType() != Primitive::kPrimNot) {
+ return;
+ }
+ DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+ GetOrCreateReferenceInfo(instruction);
+ }
+
HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
size_t offset,
HInstruction* index,
@@ -378,6 +386,7 @@ class HeapLocationCollector : public HGraphVisitor {
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ CreateReferenceInfoForReferenceType(instruction);
}
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
@@ -387,6 +396,7 @@ class HeapLocationCollector : public HGraphVisitor {
void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ CreateReferenceInfoForReferenceType(instruction);
}
void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
@@ -399,6 +409,7 @@ class HeapLocationCollector : public HGraphVisitor {
void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+ CreateReferenceInfoForReferenceType(instruction);
}
void VisitArraySet(HArraySet* instruction) OVERRIDE {
@@ -408,7 +419,23 @@ class HeapLocationCollector : public HGraphVisitor {
void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
// Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
- GetOrCreateReferenceInfo(new_instance);
+ CreateReferenceInfoForReferenceType(new_instance);
+ }
+
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
+ }
+
+ void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+ CreateReferenceInfoForReferenceType(instruction);
}
void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 926bc156cf..bb0b545c1e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -198,10 +198,38 @@ void HGraph::ComputeDominanceInformation() {
}
}
-void HGraph::TransformToSsa() {
- DCHECK(!reverse_post_order_.empty());
- SsaBuilder ssa_builder(this);
- ssa_builder.BuildSsa();
+BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
+ BuildDominatorTree();
+
+ // The SSA builder requires loops to all be natural. Specifically, the dead phi
+ // elimination phase checks the consistency of the graph when doing a post-order
+ // visit for eliminating dead phis: a dead phi can only have loop header phi
+ // users remaining when being visited.
+ BuildSsaResult result = AnalyzeNaturalLoops();
+ if (result != kBuildSsaSuccess) {
+ return result;
+ }
+
+ // Precompute per-block try membership before entering the SSA builder,
+ // which needs the information to build catch block phis from values of
+ // locals at throwing instructions inside try blocks.
+ ComputeTryBlockInformation();
+
+ // Create the inexact Object reference type and store it in the HGraph.
+ ScopedObjectAccess soa(Thread::Current());
+ ClassLinker* linker = Runtime::Current()->GetClassLinker();
+ inexact_object_rti_ = ReferenceTypeInfo::Create(
+ handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+ /* is_exact */ false);
+
+ // Tranforms graph to SSA form.
+ result = SsaBuilder(this, handles).BuildSsa();
+ if (result != kBuildSsaSuccess) {
+ return result;
+ }
+
+ in_ssa_form_ = true;
+ return kBuildSsaSuccess;
}
HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
@@ -410,7 +438,7 @@ void HGraph::SimplifyCFG() {
}
}
-bool HGraph::AnalyzeNaturalLoops() const {
+BuildSsaResult HGraph::AnalyzeNaturalLoops() const {
// Order does not matter.
for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
@@ -418,16 +446,16 @@ bool HGraph::AnalyzeNaturalLoops() const {
if (block->IsCatchBlock()) {
// TODO: Dealing with exceptional back edges could be tricky because
// they only approximate the real control flow. Bail out for now.
- return false;
+ return kBuildSsaFailThrowCatchLoop;
}
HLoopInformation* info = block->GetLoopInformation();
if (!info->Populate()) {
// Abort if the loop is non natural. We currently bailout in such cases.
- return false;
+ return kBuildSsaFailNonNaturalLoop;
}
}
}
- return true;
+ return kBuildSsaSuccess;
}
void HGraph::InsertConstant(HConstant* constant) {
@@ -446,8 +474,13 @@ HNullConstant* HGraph::GetNullConstant(uint32_t dex_pc) {
// id and/or any invariants the graph is assuming when adding new instructions.
if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) {
cached_null_constant_ = new (arena_) HNullConstant(dex_pc);
+ cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_);
InsertConstant(cached_null_constant_);
}
+ if (kIsDebugBuild) {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid());
+ }
return cached_null_constant_;
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3e38e9f2a4..55e436f0b7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -98,6 +98,13 @@ enum IfCondition {
kCondAE, // >=
};
+enum BuildSsaResult {
+ kBuildSsaFailNonNaturalLoop,
+ kBuildSsaFailThrowCatchLoop,
+ kBuildSsaFailAmbiguousArrayGet,
+ kBuildSsaSuccess,
+};
+
class HInstructionList : public ValueObject {
public:
HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -143,6 +150,122 @@ class HInstructionList : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HInstructionList);
};
+class ReferenceTypeInfo : ValueObject {
+ public:
+ typedef Handle<mirror::Class> TypeHandle;
+
+ static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
+ // The constructor will check that the type_handle is valid.
+ return ReferenceTypeInfo(type_handle, is_exact);
+ }
+
+ static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
+
+ static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+ return handle.GetReference() != nullptr;
+ }
+
+ bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ return IsValidHandle(type_handle_);
+ }
+
+ bool IsExact() const { return is_exact_; }
+
+ bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsObjectClass();
+ }
+
+ bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsStringClass();
+ }
+
+ bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+ }
+
+ bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsInterface();
+ }
+
+ bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsArrayClass();
+ }
+
+ bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsPrimitiveArray();
+ }
+
+ bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+ }
+
+ bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ if (!IsExact()) return false;
+ if (!IsArrayClass()) return false;
+ return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
+
+ bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ if (!IsExact()) return false;
+ if (!IsArrayClass()) return false;
+ if (!rti.IsArrayClass()) return false;
+ return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+ rti.GetTypeHandle()->GetComponentType());
+ }
+
+ Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
+
+ bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ DCHECK(rti.IsValid());
+ return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
+
+ bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ DCHECK(rti.IsValid());
+ return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+ GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
+
+ // Returns true if the type information provide the same amount of details.
+ // Note that it does not mean that the instructions have the same actual type
+ // (because the type can be the result of a merge).
+ bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
+ if (!IsValid() && !rti.IsValid()) {
+ // Invalid types are equal.
+ return true;
+ }
+ if (!IsValid() || !rti.IsValid()) {
+ // One is valid, the other not.
+ return false;
+ }
+ return IsExact() == rti.IsExact()
+ && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+ }
+
+ private:
+ ReferenceTypeInfo();
+ ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
+
+ // The class of the object.
+ TypeHandle type_handle_;
+ // Whether or not the type is exact or a superclass of the actual type.
+ // Whether or not we have any information about this type.
+ bool is_exact_;
+};
+
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
// Control-flow graph of a method. Contains a list of basic blocks.
class HGraph : public ArenaObject<kArenaAllocGraph> {
public:
@@ -179,7 +302,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
- cached_current_method_(nullptr) {
+ cached_current_method_(nullptr),
+ inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
blocks_.reserve(kDefaultNumberOfBlocks);
}
@@ -197,36 +321,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
void AddBlock(HBasicBlock* block);
- // Try building the SSA form of this graph, with dominance computation and loop
- // recognition. Returns whether it was successful in doing all these steps.
- bool TryBuildingSsa() {
- BuildDominatorTree();
- // The SSA builder requires loops to all be natural. Specifically, the dead phi
- // elimination phase checks the consistency of the graph when doing a post-order
- // visit for eliminating dead phis: a dead phi can only have loop header phi
- // users remaining when being visited.
- if (!AnalyzeNaturalLoops()) return false;
- // Precompute per-block try membership before entering the SSA builder,
- // which needs the information to build catch block phis from values of
- // locals at throwing instructions inside try blocks.
- ComputeTryBlockInformation();
- TransformToSsa();
- in_ssa_form_ = true;
- return true;
- }
+ // Try building the SSA form of this graph, with dominance computation and
+ // loop recognition. Returns a code specifying that it was successful or the
+ // reason for failure.
+ BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles);
void ComputeDominanceInformation();
void ClearDominanceInformation();
void BuildDominatorTree();
- void TransformToSsa();
void SimplifyCFG();
void SimplifyCatchBlocks();
- // Analyze all natural loops in this graph. Returns false if one
- // loop is not natural, that is the header does not dominate the
- // back edge.
- bool AnalyzeNaturalLoops() const;
+ // Analyze all natural loops in this graph. Returns a code specifying that it
+ // was successful or the reason for failure. The method will fail if a loop
+ // is not natural, that is the header does not dominate a back edge, or if it
+ // is a throw-catch loop, i.e. the header is a catch block.
+ BuildSsaResult AnalyzeNaturalLoops() const;
// Iterate over blocks to compute try block membership. Needs reverse post
// order and loop information.
@@ -487,6 +598,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// (such as when the superclass could not be found).
ArtMethod* art_method_;
+ // Keep the RTI of inexact Object to avoid having to pass stack handle
+ // collection pointer to passes which may create NullConstant.
+ ReferenceTypeInfo inexact_object_rti_;
+
friend class SsaBuilder; // For caching constants.
friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1082,6 +1197,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(Rem, BinaryOperation) \
M(Return, Instruction) \
M(ReturnVoid, Instruction) \
+ M(Ror, BinaryOperation) \
M(Shl, BinaryOperation) \
M(Shr, BinaryOperation) \
M(StaticFieldGet, Instruction) \
@@ -1673,122 +1789,6 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
DISALLOW_COPY_AND_ASSIGN(HEnvironment);
};
-class ReferenceTypeInfo : ValueObject {
- public:
- typedef Handle<mirror::Class> TypeHandle;
-
- static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
- // The constructor will check that the type_handle is valid.
- return ReferenceTypeInfo(type_handle, is_exact);
- }
-
- static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
-
- static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
- return handle.GetReference() != nullptr;
- }
-
- bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
- return IsValidHandle(type_handle_);
- }
-
- bool IsExact() const { return is_exact_; }
-
- bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsObjectClass();
- }
-
- bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsStringClass();
- }
-
- bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
- }
-
- bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsInterface();
- }
-
- bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsArrayClass();
- }
-
- bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsPrimitiveArray();
- }
-
- bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
- }
-
- bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- if (!IsExact()) return false;
- if (!IsArrayClass()) return false;
- return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
- }
-
- bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- if (!IsExact()) return false;
- if (!IsArrayClass()) return false;
- if (!rti.IsArrayClass()) return false;
- return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
- rti.GetTypeHandle()->GetComponentType());
- }
-
- Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
-
- bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- DCHECK(rti.IsValid());
- return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
- }
-
- bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
- DCHECK(IsValid());
- DCHECK(rti.IsValid());
- return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
- GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
- }
-
- // Returns true if the type information provide the same amount of details.
- // Note that it does not mean that the instructions have the same actual type
- // (because the type can be the result of a merge).
- bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
- if (!IsValid() && !rti.IsValid()) {
- // Invalid types are equal.
- return true;
- }
- if (!IsValid() || !rti.IsValid()) {
- // One is valid, the other not.
- return false;
- }
- return IsExact() == rti.IsExact()
- && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
- }
-
- private:
- ReferenceTypeInfo();
- ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
-
- // The class of the object.
- TypeHandle type_handle_;
- // Whether or not the type is exact or a superclass of the actual type.
- // Whether or not we have any information about this type.
- bool is_exact_;
-};
-
-std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
-
class HInstruction : public ArenaObject<kArenaAllocInstruction> {
public:
HInstruction(SideEffects side_effects, uint32_t dex_pc)
@@ -4198,6 +4198,44 @@ class HXor : public HBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HXor);
};
+class HRor : public HBinaryOperation {
+ public:
+ HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
+ : HBinaryOperation(result_type, value, distance) {}
+
+ template <typename T, typename U, typename V>
+ T Compute(T x, U y, V max_shift_value) const {
+ static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
+ "V is not the unsigned integer type corresponding to T");
+ V ux = static_cast<V>(x);
+ if ((y & max_shift_value) == 0) {
+ return static_cast<T>(ux);
+ } else {
+ const V reg_bits = sizeof(T) * 8;
+ return static_cast<T>(ux >> (y & max_shift_value)) |
+ (x << (reg_bits - (y & max_shift_value)));
+ }
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+ }
+
+ DECLARE_INSTRUCTION(Ror);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HRor);
+};
+
// The value of a parameter in this method. Its location depends on
// the calling convention.
class HParameterValue : public HExpression<0> {
@@ -4378,7 +4416,16 @@ class HPhi : public HInstruction {
void RemoveInputAt(size_t index);
Primitive::Type GetType() const OVERRIDE { return type_; }
- void SetType(Primitive::Type type) { type_ = type; }
+ void SetType(Primitive::Type new_type) {
+ // Make sure that only valid type changes occur. The following are allowed:
+ // (1) int -> float/ref (primitive type propagation),
+ // (2) long -> double (primitive type propagation).
+ DCHECK(type_ == new_type ||
+ (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+ (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+ (type_ == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+ type_ = new_type;
+ }
bool CanBeNull() const OVERRIDE { return can_be_null_; }
void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
@@ -4618,7 +4665,21 @@ class HArrayGet : public HExpression<2> {
return false;
}
- void SetType(Primitive::Type type) { type_ = type; }
+ bool IsEquivalentOf(HArrayGet* other) const {
+ bool result = (GetDexPc() == other->GetDexPc());
+ if (kIsDebugBuild && result) {
+ DCHECK_EQ(GetBlock(), other->GetBlock());
+ DCHECK_EQ(GetArray(), other->GetArray());
+ DCHECK_EQ(GetIndex(), other->GetIndex());
+ if (Primitive::IsIntOrLongType(GetType())) {
+ DCHECK(Primitive::IsFloatingPointType(other->GetType()));
+ } else {
+ DCHECK(Primitive::IsFloatingPointType(GetType()));
+ DCHECK(Primitive::IsIntOrLongType(other->GetType()));
+ }
+ }
+ return result;
+ }
HInstruction* GetArray() const { return InputAt(0); }
HInstruction* GetIndex() const { return InputAt(1); }
@@ -4925,9 +4986,13 @@ class HLoadClass : public HExpression<1> {
class HLoadString : public HExpression<1> {
public:
- HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc)
+ HLoadString(HCurrentMethod* current_method,
+ uint32_t string_index,
+ uint32_t dex_pc,
+ bool is_in_dex_cache)
: HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
- string_index_(string_index) {
+ string_index_(string_index),
+ is_in_dex_cache_(is_in_dex_cache) {
SetRawInputAt(0, current_method);
}
@@ -4945,6 +5010,7 @@ class HLoadString : public HExpression<1> {
bool NeedsEnvironment() const OVERRIDE { return false; }
bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; }
bool CanBeNull() const OVERRIDE { return false; }
+ bool IsInDexCache() const { return is_in_dex_cache_; }
static SideEffects SideEffectsForArchRuntimeCalls() {
return SideEffects::CanTriggerGC();
@@ -4954,6 +5020,7 @@ class HLoadString : public HExpression<1> {
private:
const uint32_t string_index_;
+ const bool is_in_dex_cache_;
DISALLOW_COPY_AND_ASSIGN(HLoadString);
};
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index e8439354af..18405f2623 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -17,6 +17,8 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
#define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
+#include "nodes.h"
+
namespace art {
class HArm64DataProcWithShifterOp : public HExpression<2> {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 831b626c4f..ba435180e5 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -501,11 +501,8 @@ static void RunOptimizations(HGraph* graph,
CompilerDriver* driver,
OptimizingCompilerStats* stats,
const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer) {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
- ScopedThreadSuspension sts(soa.Self(), kNative);
-
+ PassObserver* pass_observer,
+ StackHandleScopeCollection* handles) {
ArenaAllocator* arena = graph->GetArena();
HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -522,29 +519,23 @@ static void RunOptimizations(HGraph* graph,
LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
- ReferenceTypePropagation* type_propagation =
- new (arena) ReferenceTypePropagation(graph, &handles);
HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier_after_types");
- InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
graph, stats, "instruction_simplifier_after_bce");
- InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
+ InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
graph, stats, "instruction_simplifier_before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
HOptimization* optimizations1[] = {
intrinsics,
+ sharpening,
fold1,
simplify1,
- type_propagation,
- sharpening,
dce1,
- simplify2
};
RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
- MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
+ MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
HOptimization* optimizations2[] = {
// BooleanSimplifier depends on the InstructionSimplifier removing
@@ -557,13 +548,13 @@ static void RunOptimizations(HGraph* graph,
induction,
bce,
fold3, // evaluates code generated by dynamic bce
- simplify3,
+ simplify2,
lse,
dce2,
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
- simplify4,
+ simplify3,
};
RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
@@ -768,14 +759,29 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
}
VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
+
if (run_optimizations_) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScopeCollection handles(soa.Self());
+ ScopedThreadSuspension sts(soa.Self(), kNative);
+
{
PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
- if (!graph->TryBuildingSsa()) {
- // We could not transform the graph to SSA, bailout.
- LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
- << ": it contains a non natural loop";
- MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
+ BuildSsaResult result = graph->TryBuildingSsa(&handles);
+ if (result != kBuildSsaSuccess) {
+ switch (result) {
+ case kBuildSsaFailNonNaturalLoop:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop);
+ break;
+ case kBuildSsaFailThrowCatchLoop:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+ break;
+ case kBuildSsaFailAmbiguousArrayGet:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayGet);
+ break;
+ case kBuildSsaSuccess:
+ UNREACHABLE();
+ }
pass_observer.SetGraphInBadState();
return nullptr;
}
@@ -786,7 +792,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
compiler_driver,
compilation_stats_.get(),
dex_compilation_unit,
- &pass_observer);
+ &pass_observer,
+ &handles);
codegen->CompileOptimized(code_allocator);
} else {
codegen->CompileBaseline(code_allocator);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6296eedfb0..4713514bb2 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,9 @@ enum MethodCompilationStat {
kRemovedDeadInstruction,
kRemovedNullCheck,
kNotCompiledBranchOutsideMethodCode,
- kNotCompiledCannotBuildSSA,
+ kNotCompiledNonNaturalLoop,
+ kNotCompiledThrowCatchLoop,
+ kNotCompiledAmbiguousArrayGet,
kNotCompiledHugeMethod,
kNotCompiledLargeMethodNoBranches,
kNotCompiledMalformedOpcode,
@@ -104,7 +106,9 @@ class OptimizingCompilerStats {
case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
case kRemovedNullCheck: name = "RemovedNullCheck"; break;
case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
- case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+ case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break;
+ case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
+ case kNotCompiledAmbiguousArrayGet : name = "NotCompiledAmbiguousArrayGet"; break;
case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 350f0b14ab..af3a005304 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -19,9 +19,13 @@
#include "nodes.h"
#include "builder.h"
+#include "common_compiler_test.h"
#include "compiler/dex/pass_manager.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change.h"
+#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
#include "gtest/gtest.h"
@@ -42,7 +46,6 @@ namespace art {
#define FIVE_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(5, __VA_ARGS__)
#define SIX_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(6, __VA_ARGS__)
-
LiveInterval* BuildInterval(const size_t ranges[][2],
size_t number_of_ranges,
ArenaAllocator* allocator,
@@ -111,6 +114,12 @@ inline bool IsRemoved(HInstruction* instruction) {
return instruction->GetBlock() == nullptr;
}
+inline void TransformToSsa(HGraph* graph) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScopeCollection handles(soa.Self());
+ EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess);
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
deleted file mode 100644
index bde54ee977..0000000000
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "primitive_type_propagation.h"
-
-#include "nodes.h"
-#include "ssa_builder.h"
-
-namespace art {
-
-static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) {
- // We trust the verifier has already done the necessary checking.
- switch (existing) {
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- case Primitive::kPrimNot:
- return existing;
- default:
- // Phis are initialized with a void type, so if we are asked
- // to merge with a void type, we should use the existing one.
- return new_type == Primitive::kPrimVoid
- ? existing
- : HPhi::ToPhiType(new_type);
- }
-}
-
-// Re-compute and update the type of the instruction. Returns
-// whether or not the type was changed.
-bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
- DCHECK(phi->IsLive());
- Primitive::Type existing = phi->GetType();
-
- Primitive::Type new_type = existing;
- for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
- Primitive::Type input_type = phi->InputAt(i)->GetType();
- new_type = MergeTypes(new_type, input_type);
- }
- phi->SetType(new_type);
-
- if (new_type == Primitive::kPrimDouble
- || new_type == Primitive::kPrimFloat
- || new_type == Primitive::kPrimNot) {
- // If the phi is of floating point type, we need to update its inputs to that
- // type. For inputs that are phis, we need to recompute their types.
- for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
- HInstruction* input = phi->InputAt(i);
- if (input->GetType() != new_type) {
- HInstruction* equivalent = (new_type == Primitive::kPrimNot)
- ? SsaBuilder::GetReferenceTypeEquivalent(input)
- : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
- phi->ReplaceInput(equivalent, i);
- if (equivalent->IsPhi()) {
- AddToWorklist(equivalent->AsPhi());
- } else if (equivalent == input) {
- // The input has changed its type. It can be an input of other phis,
- // so we need to put phi users in the work list.
- AddDependentInstructionsToWorklist(equivalent);
- }
- }
- }
- }
-
- return existing != new_type;
-}
-
-void PrimitiveTypePropagation::Run() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
- }
- ProcessWorklist();
-}
-
-void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
- if (block->IsLoopHeader()) {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->AsPhi();
- if (phi->IsLive()) {
- AddToWorklist(phi);
- }
- }
- } else {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- // Eagerly compute the type of the phi, for quicker convergence. Note
- // that we don't need to add users to the worklist because we are
- // doing a reverse post-order visit, therefore either the phi users are
- // non-loop phi and will be visited later in the visit, or are loop-phis,
- // and they are already in the work list.
- HPhi* phi = it.Current()->AsPhi();
- if (phi->IsLive()) {
- UpdateType(phi);
- }
- }
- }
-}
-
-void PrimitiveTypePropagation::ProcessWorklist() {
- while (!worklist_.empty()) {
- HPhi* instruction = worklist_.back();
- worklist_.pop_back();
- if (UpdateType(instruction)) {
- AddDependentInstructionsToWorklist(instruction);
- }
- }
-}
-
-void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
- DCHECK(instruction->IsLive());
- worklist_.push_back(instruction);
-}
-
-void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
- for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) {
- AddToWorklist(phi);
- }
- }
-}
-
-} // namespace art
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
deleted file mode 100644
index 212fcfc69f..0000000000
--- a/compiler/optimizing/primitive_type_propagation.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
-
-#include "base/arena_containers.h"
-#include "nodes.h"
-
-namespace art {
-
-// Compute and propagate primitive types of phis in the graph.
-class PrimitiveTypePropagation : public ValueObject {
- public:
- explicit PrimitiveTypePropagation(HGraph* graph)
- : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) {
- worklist_.reserve(kDefaultWorklistSize);
- }
-
- void Run();
-
- private:
- void VisitBasicBlock(HBasicBlock* block);
- void ProcessWorklist();
- void AddToWorklist(HPhi* phi);
- void AddDependentInstructionsToWorklist(HInstruction* instruction);
- bool UpdateType(HPhi* phi);
-
- HGraph* const graph_;
- ArenaVector<HPhi*> worklist_;
-
- static constexpr size_t kDefaultWorklistSize = 8;
-
- DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index fea903d9cf..94a297c9e6 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -40,7 +40,6 @@ class RTPVisitor : public HGraphDelegateVisitor {
throwable_class_handle_(throwable_class_handle),
worklist_(worklist) {}
- void VisitNullConstant(HNullConstant* null_constant) OVERRIDE;
void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
@@ -71,8 +70,6 @@ class RTPVisitor : public HGraphDelegateVisitor {
ReferenceTypeInfo::TypeHandle string_class_handle_;
ReferenceTypeInfo::TypeHandle throwable_class_handle_;
ArenaVector<HInstruction*>* worklist_;
-
- static constexpr size_t kDefaultWorklistSize = 8;
};
ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
@@ -171,9 +168,13 @@ static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
ScopedObjectAccess soa(Thread::Current());
for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
- fn(instr);
+ HPhi* phi = it.Current()->AsPhi();
+ // Note that the graph may contain dead phis when run from the SsaBuilder.
+ // Skip those as they might have a type conflict and will be removed anyway.
+ if (phi->IsLive() &&
+ phi->GetType() == Primitive::kPrimNot &&
+ !phi->GetReferenceTypeInfo().IsValid()) {
+ fn(phi);
}
}
for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
@@ -376,6 +377,75 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) {
}
}
+// Returns true if one of the patterns below has been recognized. If so, the
+// InstanceOf instruction together with the true branch of `ifInstruction` will
+// be returned using the out parameters.
+// Recognized patterns:
+// (1) patterns equivalent to `if (obj instanceof X)`
+// (a) InstanceOf -> Equal to 1 -> If
+// (b) InstanceOf -> NotEqual to 0 -> If
+// (c) InstanceOf -> If
+// (2) patterns equivalent to `if (!(obj instanceof X))`
+// (a) InstanceOf -> Equal to 0 -> If
+// (b) InstanceOf -> NotEqual to 1 -> If
+// (c) InstanceOf -> BooleanNot -> If
+static bool MatchIfInstanceOf(HIf* ifInstruction,
+ /* out */ HInstanceOf** instanceOf,
+ /* out */ HBasicBlock** trueBranch) {
+ HInstruction* input = ifInstruction->InputAt(0);
+
+ if (input->IsEqual()) {
+ HInstruction* rhs = input->AsEqual()->GetConstantRight();
+ if (rhs != nullptr) {
+ HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft();
+ if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+ if (rhs->AsIntConstant()->IsOne()) {
+ // Case (1a)
+ *trueBranch = ifInstruction->IfTrueSuccessor();
+ } else {
+ // Case (2a)
+ DCHECK(rhs->AsIntConstant()->IsZero());
+ *trueBranch = ifInstruction->IfFalseSuccessor();
+ }
+ *instanceOf = lhs->AsInstanceOf();
+ return true;
+ }
+ }
+ } else if (input->IsNotEqual()) {
+ HInstruction* rhs = input->AsNotEqual()->GetConstantRight();
+ if (rhs != nullptr) {
+ HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft();
+ if (lhs->IsInstanceOf() && rhs->IsIntConstant()) {
+ if (rhs->AsIntConstant()->IsZero()) {
+ // Case (1b)
+ *trueBranch = ifInstruction->IfTrueSuccessor();
+ } else {
+ // Case (2b)
+ DCHECK(rhs->AsIntConstant()->IsOne());
+ *trueBranch = ifInstruction->IfFalseSuccessor();
+ }
+ *instanceOf = lhs->AsInstanceOf();
+ return true;
+ }
+ }
+ } else if (input->IsInstanceOf()) {
+ // Case (1c)
+ *instanceOf = input->AsInstanceOf();
+ *trueBranch = ifInstruction->IfTrueSuccessor();
+ return true;
+ } else if (input->IsBooleanNot()) {
+ HInstruction* not_input = input->InputAt(0);
+ if (not_input->IsInstanceOf()) {
+ // Case (2c)
+ *instanceOf = not_input->AsInstanceOf();
+ *trueBranch = ifInstruction->IfFalseSuccessor();
+ return true;
+ }
+ }
+
+ return false;
+}
+
// Detects if `block` is the True block for the pattern
// `if (x instanceof ClassX) { }`
// If that's the case insert an HBoundType instruction to bound the type of `x`
@@ -385,22 +455,11 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
if (ifInstruction == nullptr) {
return;
}
- HInstruction* ifInput = ifInstruction->InputAt(0);
- HInstruction* instanceOf = nullptr;
- HBasicBlock* instanceOfTrueBlock = nullptr;
- // The instruction simplifier has transformed:
- // - `if (a instanceof A)` into an HIf with an HInstanceOf input
- // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn
- // has an HInstanceOf input)
- // So we should not see the usual HEqual here.
- if (ifInput->IsInstanceOf()) {
- instanceOf = ifInput;
- instanceOfTrueBlock = ifInstruction->IfTrueSuccessor();
- } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) {
- instanceOf = ifInput->InputAt(0);
- instanceOfTrueBlock = ifInstruction->IfFalseSuccessor();
- } else {
+ // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns.
+ HInstanceOf* instanceOf = nullptr;
+ HBasicBlock* instanceOfTrueBlock = nullptr;
+ if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) {
return;
}
@@ -505,13 +564,6 @@ void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr,
SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
}
-void RTPVisitor::VisitNullConstant(HNullConstant* instr) {
- // TODO: The null constant could be bound contextually (e.g. based on return statements)
- // to a more precise type.
- instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
-}
-
void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
}
@@ -523,7 +575,11 @@ void RTPVisitor::VisitNewArray(HNewArray* instr) {
static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
SHARED_REQUIRES(Locks::mutator_lock_) {
mirror::DexCache* dex_cache =
- Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+ Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true);
+ if (dex_cache == nullptr) {
+ // Dex cache could not be found. This should only happen during gtests.
+ return nullptr;
+ }
// Get type from dex cache assuming it was populated by the verifier.
return dex_cache->GetResolvedType(type_idx);
}
@@ -540,17 +596,24 @@ void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
const FieldInfo& info) {
- // The field index is unknown only during tests.
- if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) {
+ if (instr->GetType() != Primitive::kPrimNot) {
return;
}
ScopedObjectAccess soa(Thread::Current());
- ClassLinker* cl = Runtime::Current()->GetClassLinker();
- ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
- // TODO: There are certain cases where we can't resolve the field.
- // b/21914925 is open to keep track of a repro case for this issue.
- mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>();
+ mirror::Class* klass = nullptr;
+
+ // The field index is unknown only during tests.
+ if (info.GetFieldIndex() != kUnknownFieldIndex) {
+ ClassLinker* cl = Runtime::Current()->GetClassLinker();
+ ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+ // TODO: There are certain cases where we can't resolve the field.
+ // b/21914925 is open to keep track of a repro case for this issue.
+ if (field != nullptr) {
+ klass = field->GetType<false>();
+ }
+ }
+
SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
}
@@ -666,7 +729,7 @@ void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
}
void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
- if (phi->GetType() != Primitive::kPrimNot) {
+ if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) {
return;
}
@@ -824,6 +887,8 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
// NullConstant inputs are ignored during merging as they do not provide any useful information.
// If all the inputs are NullConstants then the type of the phi will be set to Object.
void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+ DCHECK(instr->IsLive());
+
size_t input_count = instr->InputCount();
size_t first_input_index_not_null = 0;
while (first_input_index_not_null < input_count &&
@@ -868,7 +933,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
// Re-computes and updates the nullability of the instruction. Returns whether or
// not the nullability was changed.
bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
- DCHECK(instr->IsPhi()
+ DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive())
|| instr->IsBoundType()
|| instr->IsNullCheck()
|| instr->IsArrayGet());
@@ -916,7 +981,7 @@ void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
HInstruction* user = it.Current()->GetUser();
- if (user->IsPhi()
+ if ((user->IsPhi() && user->AsPhi()->IsLive())
|| user->IsBoundType()
|| user->IsNullCheck()
|| (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 080f970756..b900ed0966 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -28,13 +28,13 @@
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
-#include "gtest/gtest.h"
-
namespace art {
// Note: the register allocator tests rely on the fact that constants have live
// intervals and registers get allocated to them.
+class RegisterAllocatorTest : public CommonCompilerTest {};
+
static bool Check(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
@@ -42,7 +42,7 @@ static bool Check(const uint16_t* data) {
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -57,7 +57,7 @@ static bool Check(const uint16_t* data) {
* Unit testing of RegisterAllocator::ValidateIntervals. Register allocator
* tests are based on this validation method.
*/
-TEST(RegisterAllocatorTest, ValidateIntervals) {
+TEST_F(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = CreateGraph(&allocator);
@@ -146,7 +146,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
}
}
-TEST(RegisterAllocatorTest, CFG1) {
+TEST_F(RegisterAllocatorTest, CFG1) {
/*
* Test the following snippet:
* return 0;
@@ -166,7 +166,7 @@ TEST(RegisterAllocatorTest, CFG1) {
ASSERT_TRUE(Check(data));
}
-TEST(RegisterAllocatorTest, Loop1) {
+TEST_F(RegisterAllocatorTest, Loop1) {
/*
* Test the following snippet:
* int a = 0;
@@ -205,7 +205,7 @@ TEST(RegisterAllocatorTest, Loop1) {
ASSERT_TRUE(Check(data));
}
-TEST(RegisterAllocatorTest, Loop2) {
+TEST_F(RegisterAllocatorTest, Loop2) {
/*
* Test the following snippet:
* int a = 0;
@@ -259,11 +259,11 @@ static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
- graph->TryBuildingSsa();
+ TransformToSsa(graph);
return graph;
}
-TEST(RegisterAllocatorTest, Loop3) {
+TEST_F(RegisterAllocatorTest, Loop3) {
/*
* Test the following snippet:
* int a = 0
@@ -326,7 +326,7 @@ TEST(RegisterAllocatorTest, Loop3) {
ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
}
-TEST(RegisterAllocatorTest, FirstRegisterUse) {
+TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8,
@@ -366,7 +366,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
}
-TEST(RegisterAllocatorTest, DeadPhi) {
+TEST_F(RegisterAllocatorTest, DeadPhi) {
/* Test for a dead loop phi taking as back-edge input a phi that also has
* this loop phi as input. Walking backwards in SsaDeadPhiElimination
* does not solve the problem because the loop phi will be visited last.
@@ -407,7 +407,7 @@ TEST(RegisterAllocatorTest, DeadPhi) {
* that share the same register. It should split the interval it is currently
* allocating for at the minimum lifetime position between the two inactive intervals.
*/
-TEST(RegisterAllocatorTest, FreeUntil) {
+TEST_F(RegisterAllocatorTest, FreeUntil) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN);
@@ -539,7 +539,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
return graph;
}
-TEST(RegisterAllocatorTest, PhiHint) {
+TEST_F(RegisterAllocatorTest, PhiHint) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HPhi *phi;
@@ -658,7 +658,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
return graph;
}
-TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *field, *ret;
@@ -726,7 +726,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
return graph;
}
-TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *first_sub, *second_sub;
@@ -795,7 +795,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator,
return graph;
}
-TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HInstruction *div;
@@ -819,7 +819,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
// Test a bug in the register allocator, where allocating a blocked
// register would lead to spilling an inactive interval at the wrong
// position.
-TEST(RegisterAllocatorTest, SpillInactive) {
+TEST_F(RegisterAllocatorTest, SpillInactive) {
ArenaPool pool;
// Create a synthesized graph to please the register_allocator and
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 9e6cfbe653..9e869e18e9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -17,214 +17,11 @@
#include "ssa_builder.h"
#include "nodes.h"
-#include "primitive_type_propagation.h"
+#include "reference_type_propagation.h"
#include "ssa_phi_elimination.h"
namespace art {
-// Returns whether this is a loop header phi which was eagerly created but later
-// found inconsistent due to the vreg being undefined in one of its predecessors.
-// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
-static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
- return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
-}
-
-/**
- * A debuggable application may require to reviving phis, to ensure their
- * associated DEX register is available to a debugger. This class implements
- * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It
- * also makes sure that phis with incompatible input types are not revived
- * (statement (b) of the SsaBuilder).
- *
- * This phase must be run after detecting dead phis through the
- * DeadPhiElimination phase, and before deleting the dead phis.
- */
-class DeadPhiHandling : public ValueObject {
- public:
- explicit DeadPhiHandling(HGraph* graph)
- : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
- worklist_.reserve(kDefaultWorklistSize);
- }
-
- void Run();
-
- private:
- void VisitBasicBlock(HBasicBlock* block);
- void ProcessWorklist();
- void AddToWorklist(HPhi* phi);
- void AddDependentInstructionsToWorklist(HPhi* phi);
- bool UpdateType(HPhi* phi);
-
- HGraph* const graph_;
- ArenaVector<HPhi*> worklist_;
-
- static constexpr size_t kDefaultWorklistSize = 8;
-
- DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
-};
-
-static bool HasConflictingEquivalent(HPhi* phi) {
- if (phi->GetNext() == nullptr) {
- return false;
- }
- HPhi* next = phi->GetNext()->AsPhi();
- if (next->GetRegNumber() == phi->GetRegNumber()) {
- if (next->GetType() == Primitive::kPrimVoid) {
- // We only get a void type for an equivalent phi we processed and found out
- // it was conflicting.
- return true;
- } else {
- // Go to the next phi, in case it is also an equivalent.
- return HasConflictingEquivalent(next);
- }
- }
- return false;
-}
-
-bool DeadPhiHandling::UpdateType(HPhi* phi) {
- if (phi->IsDead()) {
- // Phi was rendered dead while waiting in the worklist because it was replaced
- // with an equivalent.
- return false;
- }
-
- Primitive::Type existing = phi->GetType();
-
- bool conflict = false;
- Primitive::Type new_type = existing;
- for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
- HInstruction* input = phi->InputAt(i);
- if (input->IsPhi() && input->AsPhi()->IsDead()) {
- // We are doing a reverse post order visit of the graph, reviving
- // phis that have environment uses and updating their types. If an
- // input is a phi, and it is dead (because its input types are
- // conflicting), this phi must be marked dead as well.
- conflict = true;
- break;
- }
- Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
-
- // The only acceptable transitions are:
- // - From void to typed: first time we update the type of this phi.
- // - From int to reference (or reference to int): the phi has to change
- // to reference type. If the integer input cannot be converted to a
- // reference input, the phi will remain dead.
- if (new_type == Primitive::kPrimVoid) {
- new_type = input_type;
- } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
- if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
- // If we already asked for an equivalent of the input phi, but that equivalent
- // ended up conflicting, make this phi conflicting too.
- conflict = true;
- break;
- }
- HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
- if (equivalent == nullptr) {
- conflict = true;
- break;
- }
- phi->ReplaceInput(equivalent, i);
- if (equivalent->IsPhi()) {
- DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
- // We created a new phi, but that phi has the same inputs as the old phi. We
- // add it to the worklist to ensure its inputs can also be converted to reference.
- // If not, it will remain dead, and the algorithm will make the current phi dead
- // as well.
- equivalent->AsPhi()->SetLive();
- AddToWorklist(equivalent->AsPhi());
- }
- } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
- new_type = Primitive::kPrimNot;
- // Start over, we may request reference equivalents for the inputs of the phi.
- i = -1;
- } else if (new_type != input_type) {
- conflict = true;
- break;
- }
- }
-
- if (conflict) {
- phi->SetType(Primitive::kPrimVoid);
- phi->SetDead();
- return true;
- } else if (existing == new_type) {
- return false;
- }
-
- DCHECK(phi->IsLive());
- phi->SetType(new_type);
-
- // There might exist a `new_type` equivalent of `phi` already. In that case,
- // we replace the equivalent with the, now live, `phi`.
- HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType();
- if (equivalent != nullptr) {
- // There cannot be more than two equivalents with the same type.
- DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr);
- // If doing fix-point iteration, the equivalent might be in `worklist_`.
- // Setting it dead will make UpdateType skip it.
- equivalent->SetDead();
- equivalent->ReplaceWith(phi);
- }
-
- return true;
-}
-
-void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->AsPhi();
- if (IsUndefinedLoopHeaderPhi(phi)) {
- DCHECK(phi->IsDead());
- continue;
- }
- if (phi->IsDead() && phi->HasEnvironmentUses()) {
- phi->SetLive();
- if (block->IsLoopHeader()) {
- // Loop phis must have a type to guarantee convergence of the algorithm.
- DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
- AddToWorklist(phi);
- } else {
- // Because we are doing a reverse post order visit, all inputs of
- // this phi have been visited and therefore had their (initial) type set.
- UpdateType(phi);
- }
- }
- }
-}
-
-void DeadPhiHandling::ProcessWorklist() {
- while (!worklist_.empty()) {
- HPhi* instruction = worklist_.back();
- worklist_.pop_back();
- // Note that the same equivalent phi can be added multiple times in the work list, if
- // used by multiple phis. The first call to `UpdateType` will know whether the phi is
- // dead or live.
- if (instruction->IsLive() && UpdateType(instruction)) {
- AddDependentInstructionsToWorklist(instruction);
- }
- }
-}
-
-void DeadPhiHandling::AddToWorklist(HPhi* instruction) {
- DCHECK(instruction->IsLive());
- worklist_.push_back(instruction);
-}
-
-void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) {
- for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr && !phi->IsDead()) {
- AddToWorklist(phi);
- }
- }
-}
-
-void DeadPhiHandling::Run() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
- }
- ProcessWorklist();
-}
-
void SsaBuilder::SetLoopHeaderPhiInputs() {
for (size_t i = loop_headers_.size(); i > 0; --i) {
HBasicBlock* block = loop_headers_[i - 1];
@@ -285,10 +82,11 @@ void SsaBuilder::EquivalentPhisCleanup() {
HPhi* phi = it.Current()->AsPhi();
HPhi* next = phi->GetNextEquivalentPhiWithSameType();
if (next != nullptr) {
- // Make sure we do not replace a live phi with a dead phi. A live phi has been
- // handled by the type propagation phase, unlike a dead phi.
+ // Make sure we do not replace a live phi with a dead phi. A live phi
+ // has been handled by the type propagation phase, unlike a dead phi.
if (next->IsLive()) {
phi->ReplaceWith(next);
+ phi->SetDead();
} else {
next->ReplaceWith(phi);
}
@@ -300,64 +98,7 @@ void SsaBuilder::EquivalentPhisCleanup() {
}
}
-void SsaBuilder::BuildSsa() {
- // 1) Visit in reverse post order. We need to have all predecessors of a block visited
- // (with the exception of loops) in order to create the right environment for that
- // block. For loops, we create phis whose inputs will be set in 2).
- for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
- }
-
- // 2) Set inputs of loop phis.
- SetLoopHeaderPhiInputs();
-
- // 3) Mark dead phis. This will mark phis that are only used by environments:
- // at the DEX level, the type of these phis does not need to be consistent, but
- // our code generator will complain if the inputs of a phi do not have the same
- // type. The marking allows the type propagation to know which phis it needs
- // to handle. We mark but do not eliminate: the elimination will be done in
- // step 9).
- SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph());
- dead_phis_for_type_propagation.MarkDeadPhis();
-
- // 4) Propagate types of phis. At this point, phis are typed void in the general
- // case, or float/double/reference when we created an equivalent phi. So we
- // need to propagate the types across phis to give them a correct type.
- PrimitiveTypePropagation type_propagation(GetGraph());
- type_propagation.Run();
-
- // 5) When creating equivalent phis we copy the inputs of the original phi which
- // may be improperly typed. This was fixed during the type propagation in 4) but
- // as a result we may end up with two equivalent phis with the same type for
- // the same dex register. This pass cleans them up.
- EquivalentPhisCleanup();
-
- // 6) Mark dead phis again. Step 4) may have introduced new phis.
- // Step 5) might enable the death of new phis.
- SsaDeadPhiElimination dead_phis(GetGraph());
- dead_phis.MarkDeadPhis();
-
- // 7) Now that the graph is correctly typed, we can get rid of redundant phis.
- // Note that we cannot do this phase before type propagation, otherwise
- // we could get rid of phi equivalents, whose presence is a requirement for the
- // type propagation phase. Note that this is to satisfy statement (a) of the
- // SsaBuilder (see ssa_builder.h).
- SsaRedundantPhiElimination redundant_phi(GetGraph());
- redundant_phi.Run();
-
- // 8) Fix the type for null constants which are part of an equality comparison.
- // We need to do this after redundant phi elimination, to ensure the only cases
- // that we can see are reference comparison against 0. The redundant phi
- // elimination ensures we do not see a phi taking two 0 constants in a HEqual
- // or HNotEqual.
- FixNullConstantType();
-
- // 9) Make sure environments use the right phi "equivalent": a phi marked dead
- // can have a phi equivalent that is not dead. We must therefore update
- // all environment uses of the dead phi to use its equivalent. Note that there
- // can be multiple phis for the same Dex register that are live (for example
- // when merging constants), in which case it is OK for the environments
- // to just reference one.
+void SsaBuilder::FixEnvironmentPhis() {
for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
@@ -378,24 +119,345 @@ void SsaBuilder::BuildSsa() {
phi->ReplaceWith(next);
}
}
+}
- // 10) Deal with phis to guarantee liveness of phis in case of a debuggable
- // application. This is for satisfying statement (c) of the SsaBuilder
- // (see ssa_builder.h).
- if (GetGraph()->IsDebuggable()) {
- DeadPhiHandling dead_phi_handler(GetGraph());
- dead_phi_handler.Run();
+static void AddDependentInstructionsToWorklist(HInstruction* instruction,
+ ArenaVector<HPhi*>* worklist) {
+ // If `instruction` is a dead phi, type conflict was just identified. All its
+ // live phi users, and transitively users of those users, therefore need to be
+ // marked dead/conflicting too, so we add them to the worklist. Otherwise we
+ // add users whose type does not match and needs to be updated.
+ bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead();
+ for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+ HInstruction* user = it.Current()->GetUser();
+ if (user->IsPhi() && user->AsPhi()->IsLive()) {
+ if (add_all_live_phis || user->GetType() != instruction->GetType()) {
+ worklist->push_back(user->AsPhi());
+ }
+ }
}
+}
+
+// Find a candidate primitive type for `phi` by merging the type of its inputs.
+// Return false if conflict is identified.
+static bool TypePhiFromInputs(HPhi* phi) {
+ Primitive::Type common_type = phi->GetType();
- // 11) Now that the right phis are used for the environments, and we
- // have potentially revive dead phis in case of a debuggable application,
- // we can eliminate phis we do not need. Regardless of the debuggable status,
- // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
- // as well as for the code generation, which does not deal with phis of conflicting
+ for (HInputIterator it(phi); !it.Done(); it.Advance()) {
+ HInstruction* input = it.Current();
+ if (input->IsPhi() && input->AsPhi()->IsDead()) {
+ // Phis are constructed live so if an input is a dead phi, it must have
+ // been made dead due to type conflict. Mark this phi conflicting too.
+ return false;
+ }
+
+ Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
+ if (common_type == input_type) {
+ // No change in type.
+ } else if (Primitive::ComponentSize(common_type) != Primitive::ComponentSize(input_type)) {
+ // Types are of different sizes, e.g. int vs. long. Must be a conflict.
+ return false;
+ } else if (Primitive::IsIntegralType(common_type)) {
+ // Previous inputs were integral, this one is not but is of the same size.
+ // This does not imply conflict since some bytecode instruction types are
+ // ambiguous. TypeInputsOfPhi will either type them or detect a conflict.
+ DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot);
+ common_type = input_type;
+ } else if (Primitive::IsIntegralType(input_type)) {
+ // Input is integral, common type is not. Same as in the previous case, if
+ // there is a conflict, it will be detected during TypeInputsOfPhi.
+ DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot);
+ } else {
+ // Combining float and reference types. Clearly a conflict.
+ DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) ||
+ (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat));
+ return false;
+ }
+ }
+
+ // We have found a candidate type for the phi. Set it and return true. We may
+ // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi.
+ phi->SetType(common_type);
+ return true;
+}
+
+// Replace inputs of `phi` to match its type. Return false if conflict is identified.
+bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+ Primitive::Type common_type = phi->GetType();
+ if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) {
+ // Phi either contains only other untyped phis (common_type == kPrimVoid),
+ // or `common_type` is integral and we do not need to retype ambiguous inputs
+ // because they are always constructed with the integral type candidate.
+ if (kIsDebugBuild) {
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (common_type == Primitive::kPrimVoid) {
+ DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
+ } else {
+ DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) ||
+ HPhi::ToPhiType(input->GetType()) == common_type);
+ }
+ }
+ }
+ // Inputs did not need to be replaced, hence no conflict. Report success.
+ return true;
+ } else {
+ DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (input->GetType() != common_type) {
+ // Input type does not match phi's type. Try to retype the input or
+ // generate a suitably typed equivalent.
+ HInstruction* equivalent = (common_type == Primitive::kPrimNot)
+ ? GetReferenceTypeEquivalent(input)
+ : GetFloatOrDoubleEquivalent(input, common_type);
+ if (equivalent == nullptr) {
+ // Input could not be typed. Report conflict.
+ return false;
+ }
+ // Make sure the input did not change its type and we do not need to
+ // update its users.
+ DCHECK_NE(input, equivalent);
+
+ phi->ReplaceInput(equivalent, i);
+ if (equivalent->IsPhi()) {
+ worklist->push_back(equivalent->AsPhi());
+ }
+ }
+ }
+ // All inputs either matched the type of the phi or we successfully replaced
+ // them with a suitable equivalent. Report success.
+ return true;
+ }
+}
+
+// Attempt to set the primitive type of `phi` to match its inputs. Return whether
+// it was changed by the algorithm or not.
+bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
+ DCHECK(phi->IsLive());
+ Primitive::Type original_type = phi->GetType();
+
+ // Try to type the phi in two stages:
+ // (1) find a candidate type for the phi by merging types of all its inputs,
+ // (2) try to type the phi's inputs to that candidate type.
+ // Either of these stages may detect a type conflict and fail, in which case
+ // we immediately abort.
+ if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) {
+ // Conflict detected. Mark the phi dead and return true because it changed.
+ phi->SetDead();
+ return true;
+ }
+
+ // Return true if the type of the phi has changed.
+ return phi->GetType() != original_type;
+}
+
+void SsaBuilder::RunPrimitiveTypePropagation() {
+ ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+ for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ if (block->IsLoopHeader()) {
+ for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ HPhi* phi = phi_it.Current()->AsPhi();
+ if (phi->IsLive()) {
+ worklist.push_back(phi);
+ }
+ }
+ } else {
+ for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ // Eagerly compute the type of the phi, for quicker convergence. Note
+ // that we don't need to add users to the worklist because we are
+ // doing a reverse post-order visit, therefore either the phi users are
+ // non-loop phi and will be visited later in the visit, or are loop-phis,
+ // and they are already in the work list.
+ HPhi* phi = phi_it.Current()->AsPhi();
+ if (phi->IsLive()) {
+ UpdatePrimitiveType(phi, &worklist);
+ }
+ }
+ }
+ }
+
+ ProcessPrimitiveTypePropagationWorklist(&worklist);
+ EquivalentPhisCleanup();
+}
+
+void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) {
+ // Process worklist
+ while (!worklist->empty()) {
+ HPhi* phi = worklist->back();
+ worklist->pop_back();
+ // The phi could have been made dead as a result of conflicts while in the
+ // worklist. If it is now dead, there is no point in updating its type.
+ if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) {
+ AddDependentInstructionsToWorklist(phi, worklist);
+ }
+ }
+}
+
+static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+ Primitive::Type type = aget->GetType();
+ DCHECK(Primitive::IsIntOrLongType(type));
+ HArrayGet* next = aget->GetNext()->AsArrayGet();
+ return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+}
+
+static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+ Primitive::Type type = aget->GetType();
+ DCHECK(Primitive::IsIntOrLongType(type));
+ DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr);
+
+ HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet(
+ aget->GetArray(),
+ aget->GetIndex(),
+ type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble,
+ aget->GetDexPc());
+ aget->GetBlock()->InsertInstructionAfter(equivalent, aget);
+ return equivalent;
+}
+
+// Returns true if the array input of `aget` is either of type int[] or long[].
+// Should only be called on ArrayGets with ambiguous type (int/float, long/double)
+// on arrays which were typed to an array class by RTP.
+static bool IsArrayGetOnIntegralArray(HArrayGet* aget) SHARED_REQUIRES(Locks::mutator_lock_) {
+ ReferenceTypeInfo array_type = aget->GetArray()->GetReferenceTypeInfo();
+ DCHECK(array_type.IsPrimitiveArrayClass());
+ ReferenceTypeInfo::TypeHandle array_type_handle = array_type.GetTypeHandle();
+
+ bool is_integral_type;
+ if (Primitive::Is64BitType(aget->GetType())) {
+ is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveLong();
+ DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveDouble());
+ } else {
+ is_integral_type = array_type_handle->GetComponentType()->IsPrimitiveInt();
+ DCHECK(is_integral_type || array_type_handle->GetComponentType()->IsPrimitiveFloat());
+ }
+ return is_integral_type;
+}
+
+bool SsaBuilder::FixAmbiguousArrayGets() {
+ if (ambiguous_agets_.empty()) {
+ return true;
+ }
+
+ // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet
+ // uses (because they are untyped) and environment uses (if --debuggable).
+ // After resolving all ambiguous ArrayGets, we will re-run primitive type
+ // propagation on the Phis which need to be updated.
+ ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+
+ {
+ ScopedObjectAccess soa(Thread::Current());
+
+ for (HArrayGet* aget_int : ambiguous_agets_) {
+ if (!aget_int->GetArray()->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
+ // RTP did not type the input array. Bail.
+ return false;
+ }
+
+ HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int);
+ if (IsArrayGetOnIntegralArray(aget_int)) {
+ if (aget_float != nullptr) {
+ // There is a float/double equivalent. We must replace it and re-run
+ // primitive type propagation on all dependent instructions.
+ aget_float->ReplaceWith(aget_int);
+ aget_float->GetBlock()->RemoveInstruction(aget_float);
+ AddDependentInstructionsToWorklist(aget_int, &worklist);
+ }
+ } else {
+ if (aget_float == nullptr) {
+ // This is a float/double ArrayGet but there were no typed uses which
+ // would create the typed equivalent. Create it now.
+ aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int);
+ }
+ // Replace the original int/long instruction. Note that it may have phi
+ // uses, environment uses, as well as real uses (from untyped ArraySets).
+ // We need to re-run primitive type propagation on its dependent instructions.
+ aget_int->ReplaceWith(aget_float);
+ aget_int->GetBlock()->RemoveInstruction(aget_int);
+ AddDependentInstructionsToWorklist(aget_float, &worklist);
+ }
+ }
+ }
+
+ // Set a flag stating that types of ArrayGets have been resolved. This is used
+ // by GetFloatOrDoubleEquivalentOfArrayGet to report conflict.
+ agets_fixed_ = true;
+
+ if (!worklist.empty()) {
+ ProcessPrimitiveTypePropagationWorklist(&worklist);
+ EquivalentPhisCleanup();
+ }
+
+ return true;
+}
+
+BuildSsaResult SsaBuilder::BuildSsa() {
+ // 1) Visit in reverse post order. We need to have all predecessors of a block
+ // visited (with the exception of loops) in order to create the right environment
+ // for that block. For loops, we create phis whose inputs will be set in 2).
+ for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+ VisitBasicBlock(it.Current());
+ }
+
+ // 2) Set inputs of loop header phis.
+ SetLoopHeaderPhiInputs();
+
+ // 3) Propagate types of phis. At this point, phis are typed void in the general
+ // case, or float/double/reference if we created an equivalent phi. So we need
+ // to propagate the types across phis to give them a correct type. If a type
+ // conflict is detected in this stage, the phi is marked dead.
+ RunPrimitiveTypePropagation();
+
+ // 4) Now that the correct primitive types have been assigned, we can get rid
+ // of redundant phis. Note that we cannot do this phase before type propagation,
+ // otherwise we could get rid of phi equivalents, whose presence is a requirement
+ // for the type propagation phase. Note that this is to satisfy statement (a)
+ // of the SsaBuilder (see ssa_builder.h).
+ SsaRedundantPhiElimination(GetGraph()).Run();
+
+ // 5) Fix the type for null constants which are part of an equality comparison.
+ // We need to do this after redundant phi elimination, to ensure the only cases
+ // that we can see are reference comparison against 0. The redundant phi
+ // elimination ensures we do not see a phi taking two 0 constants in a HEqual
+ // or HNotEqual.
+ FixNullConstantType();
+
+ // 6) Compute type of reference type instructions. The pass assumes that
+ // NullConstant has been fixed up.
+ ReferenceTypePropagation(GetGraph(), handles_).Run();
+
+ // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
+ // or long/double). Now that RTP computed the type of the array input, the
+ // ambiguity can be resolved and the correct equivalent kept.
+ if (!FixAmbiguousArrayGets()) {
+ return kBuildSsaFailAmbiguousArrayGet;
+ }
+
+ // 8) Mark dead phis. This will mark phis which are not used by instructions
+ // or other live phis. If compiling as debuggable code, phis will also be kept
+ // live if they have an environment use.
+ SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+ dead_phi_elimimation.MarkDeadPhis();
+
+ // 9) Make sure environments use the right phi equivalent: a phi marked dead
+ // can have a phi equivalent that is not dead. In that case we have to replace
+ // it with the live equivalent because deoptimization and try/catch rely on
+ // environments containing values of all live vregs at that point. Note that
+ // there can be multiple phis for the same Dex register that are live
+ // (for example when merging constants), in which case it is okay for the
+ // environments to just reference one.
+ FixEnvironmentPhis();
+
+ // 10) Now that the right phis are used for the environments, we can eliminate
+ // phis we do not need. Regardless of the debuggable status, this phase is
+ /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
+ // as for the code generation, which does not deal with phis of conflicting
// input types.
- dead_phis.EliminateDeadPhis();
+ dead_phi_elimimation.EliminateDeadPhis();
- // 12) Clear locals.
+ // 11) Clear locals.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
it.Advance()) {
@@ -404,6 +466,8 @@ void SsaBuilder::BuildSsa() {
current->GetBlock()->RemoveInstruction(current);
}
}
+
+ return kBuildSsaSuccess;
}
ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
@@ -591,6 +655,8 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) {
* phi with a floating point / reference type.
*/
HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+ DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one.";
+
// We place the floating point /reference phi next to this phi.
HInstruction* next = phi->GetNext();
if (next != nullptr
@@ -606,35 +672,50 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive:
ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
- // Copy the inputs. Note that the graph may not be correctly typed by doing this copy,
- // but the type propagation phase will fix it.
+ // Copy the inputs. Note that the graph may not be correctly typed
+ // by doing this copy, but the type propagation phase will fix it.
new_phi->SetRawInputAt(i, phi->InputAt(i));
}
phi->GetBlock()->InsertPhiAfter(new_phi, phi);
+ DCHECK(new_phi->IsLive());
return new_phi;
} else {
+ // An existing equivalent was found. If it is dead, conflict was previously
+ // identified and we return nullptr instead.
HPhi* next_phi = next->AsPhi();
DCHECK_EQ(next_phi->GetType(), type);
- if (next_phi->IsDead()) {
- // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
- // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
- // cannot revive undefined loop header phis because they cannot have uses.
- DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
- next_phi->SetLive();
+ return next_phi->IsLive() ? next_phi : nullptr;
+ }
+}
+
+HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
+ DCHECK(Primitive::IsIntegralType(aget->GetType()));
+
+ if (!Primitive::IsIntOrLongType(aget->GetType())) {
+ // Cannot type boolean, char, byte, short to float/double.
+ return nullptr;
+ }
+
+ DCHECK(ContainsElement(ambiguous_agets_, aget));
+ if (agets_fixed_) {
+ // This used to be an ambiguous ArrayGet but its type has been resolved to
+ // int/long. Requesting a float/double equivalent should lead to a conflict.
+ if (kIsDebugBuild) {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(IsArrayGetOnIntegralArray(aget));
}
- return next_phi;
+ return nullptr;
+ } else {
+ // This is an ambiguous ArrayGet which has not been resolved yet. Return an
+ // equivalent float/double instruction to use until it is resolved.
+ HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget);
+ return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent;
}
}
-HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
- HInstruction* value,
- Primitive::Type type) {
+HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) {
if (value->IsArrayGet()) {
- // The verifier has checked that values in arrays cannot be used for both
- // floating point and non-floating point operations. It is therefore safe to just
- // change the type of the operation.
- value->AsArrayGet()->SetType(type);
- return value;
+ return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet());
} else if (value->IsLongConstant()) {
return GetDoubleEquivalent(value->AsLongConstant());
} else if (value->IsIntConstant()) {
@@ -642,12 +723,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
} else if (value->IsPhi()) {
return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
} else {
- // For other instructions, we assume the verifier has checked that the dex format is correctly
- // typed and the value in a dex register will not be used for both floating point and
- // non-floating point operations. So the only reason an instruction would want a floating
- // point equivalent is for an unused phi that will be removed by the dead phi elimination phase.
- DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")";
- return value;
+ return nullptr;
}
}
@@ -662,15 +738,17 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
}
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
+ Primitive::Type load_type = load->GetType();
HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
// If the operation requests a specific type, we make sure its input is of that type.
- if (load->GetType() != value->GetType()) {
- if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
- value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
- } else if (load->GetType() == Primitive::kPrimNot) {
+ if (load_type != value->GetType()) {
+ if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
+ value = GetFloatOrDoubleEquivalent(value, load_type);
+ } else if (load_type == Primitive::kPrimNot) {
value = GetReferenceTypeEquivalent(value);
}
}
+
load->ReplaceWith(value);
load->GetBlock()->RemoveInstruction(load);
}
@@ -760,4 +838,13 @@ void SsaBuilder::VisitTemporary(HTemporary* temp) {
temp->GetBlock()->RemoveInstruction(temp);
}
+void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
+ Primitive::Type type = aget->GetType();
+ DCHECK(!Primitive::IsFloatingPointType(type));
+ if (Primitive::IsIntOrLongType(type)) {
+ ambiguous_agets_.push_back(aget);
+ }
+ VisitInstruction(aget);
+}
+
} // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index dcce5e4c2c..ed6f5cab51 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,17 +49,20 @@ static constexpr int kDefaultNumberOfLoops = 2;
*/
class SsaBuilder : public HGraphVisitor {
public:
- explicit SsaBuilder(HGraph* graph)
+ explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
: HGraphVisitor(graph),
+ handles_(handles),
+ agets_fixed_(false),
current_locals_(nullptr),
loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+ ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
locals_for_(graph->GetBlocks().size(),
ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
loop_headers_.reserve(kDefaultNumberOfLoops);
}
- void BuildSsa();
+ BuildSsaResult BuildSsa();
// Returns locals vector for `block`. If it is a catch block, the vector will be
// prepopulated with catch phis for vregs which are defined in `current_locals_`.
@@ -71,23 +74,38 @@ class SsaBuilder : public HGraphVisitor {
void VisitStoreLocal(HStoreLocal* store);
void VisitInstruction(HInstruction* instruction);
void VisitTemporary(HTemporary* instruction);
-
- static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user,
- HInstruction* instruction,
- Primitive::Type type);
-
- static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+ void VisitArrayGet(HArrayGet* aget);
static constexpr const char* kSsaBuilderPassName = "ssa_builder";
private:
void SetLoopHeaderPhiInputs();
+ void FixEnvironmentPhis();
void FixNullConstantType();
void EquivalentPhisCleanup();
+ void RunPrimitiveTypePropagation();
+
+ // Attempts to resolve types of aget and aget-wide instructions from reference
+ // type information on the input array. Returns false if the type of the array
+ // is unknown.
+ bool FixAmbiguousArrayGets();
+
+ bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist);
+ bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
+ void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
- static HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
- static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
- static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+ HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+ HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
+ HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
+ HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
+ HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
+ HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
+
+ StackHandleScopeCollection* const handles_;
+
+ // True if types of ambiguous ArrayGets have been resolved.
+ bool agets_fixed_;
// Locals for the current block being visited.
ArenaVector<HInstruction*>* current_locals_;
@@ -96,6 +114,8 @@ class SsaBuilder : public HGraphVisitor {
// over these blocks to set the inputs of their phis.
ArenaVector<HBasicBlock*> loop_headers_;
+ ArenaVector<HArrayGet*> ambiguous_agets_;
+
// HEnvironment for each block.
ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index a3219dcc38..63aba88c2b 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -40,15 +40,17 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
continue;
}
- bool has_non_phi_use = false;
- for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
- if (!use_it.Current()->GetUser()->IsPhi()) {
- has_non_phi_use = true;
- break;
+ bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses());
+ if (!keep_alive) {
+ for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
+ if (!use_it.Current()->GetUser()->IsPhi()) {
+ keep_alive = true;
+ break;
+ }
}
}
- if (has_non_phi_use) {
+ if (keep_alive) {
worklist_.push_back(phi);
} else {
phi->SetDead();
@@ -94,8 +96,8 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
use_it.Advance()) {
HInstruction* user = use_it.Current()->GetUser();
- DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
- DCHECK(user->AsPhi()->IsDead()) << user->GetId();
+ DCHECK(user->IsLoopHeaderPhi());
+ DCHECK(user->AsPhi()->IsDead());
}
}
// Remove the phi from use lists of its inputs.
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 024278f4b2..d2885a8fd7 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,6 +28,8 @@
namespace art {
+class SsaTest : public CommonCompilerTest {};
+
class SsaPrettyPrinter : public HPrettyPrinter {
public:
explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
@@ -83,11 +85,10 @@ static void TestCode(const uint16_t* data, const char* expected) {
bool graph_built = builder.BuildGraph(*item);
ASSERT_TRUE(graph_built);
- graph->BuildDominatorTree();
+ TransformToSsa(graph);
// Suspend checks implementation may change in the future, and this test relies
// on how instructions are ordered.
RemoveSuspendChecks(graph);
- graph->TransformToSsa();
ReNumberInstructions(graph);
// Test that phis had their type set.
@@ -103,7 +104,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
ASSERT_STREQ(expected, printer.str().c_str());
}
-TEST(SsaTest, CFG1) {
+TEST_F(SsaTest, CFG1) {
// Test that we get rid of loads and stores.
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -131,7 +132,7 @@ TEST(SsaTest, CFG1) {
TestCode(data, expected);
}
-TEST(SsaTest, CFG2) {
+TEST_F(SsaTest, CFG2) {
// Test that we create a phi for the join block of an if control flow instruction
// when there is only code in the else branch.
const char* expected =
@@ -162,7 +163,7 @@ TEST(SsaTest, CFG2) {
TestCode(data, expected);
}
-TEST(SsaTest, CFG3) {
+TEST_F(SsaTest, CFG3) {
// Test that we create a phi for the join block of an if control flow instruction
// when both branches update a local.
const char* expected =
@@ -195,7 +196,7 @@ TEST(SsaTest, CFG3) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop1) {
+TEST_F(SsaTest, Loop1) {
// Test that we create a phi for an initialized local at entry of a loop.
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -228,7 +229,7 @@ TEST(SsaTest, Loop1) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop2) {
+TEST_F(SsaTest, Loop2) {
// Simple loop with one preheader and one back edge.
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -258,7 +259,7 @@ TEST(SsaTest, Loop2) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop3) {
+TEST_F(SsaTest, Loop3) {
// Test that a local not yet defined at the entry of a loop is handled properly.
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -290,7 +291,7 @@ TEST(SsaTest, Loop3) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop4) {
+TEST_F(SsaTest, Loop4) {
// Make sure we support a preheader of a loop not being the first predecessor
// in the predecessor list of the header.
const char* expected =
@@ -325,7 +326,7 @@ TEST(SsaTest, Loop4) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop5) {
+TEST_F(SsaTest, Loop5) {
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
const char* expected =
@@ -367,7 +368,7 @@ TEST(SsaTest, Loop5) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop6) {
+TEST_F(SsaTest, Loop6) {
// Test a loop with one preheader and two back edges (e.g. continue).
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -406,7 +407,7 @@ TEST(SsaTest, Loop6) {
TestCode(data, expected);
}
-TEST(SsaTest, Loop7) {
+TEST_F(SsaTest, Loop7) {
// Test a loop with one preheader, one back edge, and two exit edges (e.g. break).
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -448,7 +449,7 @@ TEST(SsaTest, Loop7) {
TestCode(data, expected);
}
-TEST(SsaTest, DeadLocal) {
+TEST_F(SsaTest, DeadLocal) {
// Test that we correctly handle a local not being used.
const char* expected =
"BasicBlock 0, succ: 1\n"
@@ -466,7 +467,7 @@ TEST(SsaTest, DeadLocal) {
TestCode(data, expected);
}
-TEST(SsaTest, LocalInIf) {
+TEST_F(SsaTest, LocalInIf) {
// Test that we do not create a phi in the join block when one predecessor
// does not update the local.
const char* expected =
@@ -496,7 +497,7 @@ TEST(SsaTest, LocalInIf) {
TestCode(data, expected);
}
-TEST(SsaTest, MultiplePredecessors) {
+TEST_F(SsaTest, MultiplePredecessors) {
// Test that we do not create a phi when one predecessor
// does not update the local.
const char* expected =