summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc65
-rw-r--r--compiler/optimizing/code_generator.cc12
-rw-r--r--compiler/optimizing/code_generator.h120
-rw-r--r--compiler/optimizing/code_generator_arm.cc807
-rw-r--r--compiler/optimizing/code_generator_arm.h110
-rw-r--r--compiler/optimizing/code_generator_arm64.cc30
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_mips.cc391
-rw-r--r--compiler/optimizing/code_generator_mips.h2
-rw-r--r--compiler/optimizing/code_generator_mips64.cc216
-rw-r--r--compiler/optimizing/code_generator_mips64.h2
-rw-r--r--compiler/optimizing/code_generator_x86.cc22
-rw-r--r--compiler/optimizing/code_generator_x86.h2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc30
-rw-r--r--compiler/optimizing/code_generator_x86_64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier.cc26
-rw-r--r--compiler/optimizing/intrinsics_arm.cc96
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc6
-rw-r--r--compiler/optimizing/intrinsics_mips.cc12
-rw-r--r--compiler/optimizing/intrinsics_mips.h5
-rw-r--r--compiler/optimizing/nodes.cc28
-rw-r--r--compiler/optimizing/nodes.h26
-rw-r--r--compiler/optimizing/nodes_arm64.h1
-rw-r--r--compiler/optimizing/optimizing_compiler.cc38
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc5
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc6
-rw-r--r--compiler/optimizing/stack_map_stream.cc2
-rw-r--r--compiler/optimizing/stack_map_test.cc16
29 files changed, 1454 insertions, 628 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index da2f9cbed5..eee6116098 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor {
loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
- HoistToPreheaderOrDeoptBlock(loop, array_get);
+ HoistToPreHeaderOrDeoptBlock(loop, array_get);
}
}
}
@@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor {
// as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
// iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
// correctly guard against any possible OOB (including arithmetic wrap-around cases).
- HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ HBasicBlock* block = GetPreHeader(loop, instruction);
induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
if (lower != nullptr) {
InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
@@ -1358,7 +1359,7 @@ class BCEVisitor : public HGraphVisitor {
return true;
} else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
- HoistToPreheaderOrDeoptBlock(loop, length);
+ HoistToPreHeaderOrDeoptBlock(loop, length);
return true;
}
}
@@ -1376,7 +1377,8 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* array = check->InputAt(0);
if (loop->IsDefinedOutOfTheLoop(array)) {
// Generate: if (array == null) deoptimize;
- HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ HBasicBlock* block = GetPreHeader(loop, check);
HInstruction* cond =
new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
InsertDeopt(loop, block, cond);
@@ -1423,6 +1425,28 @@ class BCEVisitor : public HGraphVisitor {
return true;
}
+ /**
+ * Returns appropriate preheader for the loop, depending on whether the
+ * instruction appears in the loop header or proper loop-body.
+ */
+ HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+ // Use preheader unless there is an earlier generated deoptimization block since
+ // hoisted expressions may depend on and/or used by the deoptimization tests.
+ HBasicBlock* header = loop->GetHeader();
+ const uint32_t loop_id = header->GetBlockId();
+ auto it = taken_test_loop_.find(loop_id);
+ if (it != taken_test_loop_.end()) {
+ HBasicBlock* block = it->second;
+ // If always taken, keep it that way by returning the original preheader,
+ // which can be found by following the predecessor of the true-block twice.
+ if (instruction->GetBlock() == header) {
+ return block->GetSinglePredecessor()->GetSinglePredecessor();
+ }
+ return block;
+ }
+ return loop->GetPreHeader();
+ }
+
/** Inserts a deoptimization test. */
void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
HInstruction* suspend = loop->GetSuspendCheck();
@@ -1437,28 +1461,17 @@ class BCEVisitor : public HGraphVisitor {
}
/** Hoists instruction out of the loop to preheader or deoptimization block. */
- void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
- // Use preheader unless there is an earlier generated deoptimization block since
- // hoisted expressions may depend on and/or used by the deoptimization tests.
- const uint32_t loop_id = loop->GetHeader()->GetBlockId();
- HBasicBlock* preheader = loop->GetPreHeader();
- HBasicBlock* block = preheader;
- auto it = taken_test_loop_.find(loop_id);
- if (it != taken_test_loop_.end()) {
- block = it->second;
- }
- // Hoist the instruction.
+ void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+ HBasicBlock* block = GetPreHeader(loop, instruction);
DCHECK(!instruction->HasEnvironment());
instruction->MoveBefore(block->GetLastInstruction());
}
/**
- * Adds a new taken-test structure to a loop if needed (and not already done).
+ * Adds a new taken-test structure to a loop if needed and not already done.
* The taken-test protects range analysis evaluation code to avoid any
* deoptimization caused by incorrect trip-count evaluation in non-taken loops.
*
- * Returns block in which deoptimizations/invariants can be put.
- *
* old_preheader
* |
* if_block <- taken-test protects deoptimization block
@@ -1490,16 +1503,11 @@ class BCEVisitor : public HGraphVisitor {
* array[i] = 0;
* }
*/
- HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
- // Not needed (can use preheader), or already done (can reuse)?
+ void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+ // Not needed (can use preheader) or already done (can reuse)?
const uint32_t loop_id = loop->GetHeader()->GetBlockId();
- if (!needs_taken_test) {
- return loop->GetPreHeader();
- } else {
- auto it = taken_test_loop_.find(loop_id);
- if (it != taken_test_loop_.end()) {
- return it->second;
- }
+ if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+ return;
}
// Generate top test structure.
@@ -1528,7 +1536,6 @@ class BCEVisitor : public HGraphVisitor {
if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
taken_test_loop_.Put(loop_id, true_block);
- return true_block;
}
/**
@@ -1543,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor {
* \ /
* x_1 = phi(x_0, null) <- synthetic phi
* |
- * header
+ * new_preheader
*/
void InsertPhiNodes() {
// Scan all new deoptimization blocks.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 53d3615a41..ea0b9eca9a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
stack_map_stream_.EndStackMapEntry();
}
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+ uint32_t pc = GetAssembler()->CodeSize();
+ size_t count = stack_map_stream_.GetNumberOfStackMaps();
+ return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
void CodeGenerator::RecordCatchBlockInfo() {
ArenaAllocator* arena = graph_->GetArena();
@@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
- // Control flow would not come back into the code if a fatal slow
- // path is taken, so we do not care if it triggers GC.
- slow_path->IsFatal() ||
- // HDeoptimize is a special case: we know we are not coming back from
- // it into the code.
- instruction->IsDeoptimize() ||
// When read barriers are enabled, some instructions use a
// slow path to emit a read barrier, which does not trigger
// GC, is not fatal, nor is emitted by HDeoptimize
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index eade05d7b6..5958cd89bc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -269,6 +269,8 @@ class CodeGenerator {
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+ // Check whether we have already recorded mapping at this PC.
+ bool HasStackMapAtCurrentPc();
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -611,7 +613,7 @@ class CodeGenerator {
ArenaVector<SlowPathCode*> slow_paths_;
- // The current slow path that we're generating code for.
+ // The current slow-path that we're generating code for.
SlowPathCode* current_slow_path_;
// The current block index in `block_order_` of the block
@@ -672,6 +674,122 @@ class CallingConvention {
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType: instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+ static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+ "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+ SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+ : graph_(graph),
+ codegen_(codegen),
+ slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+ // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+ // Templating the method (rather than the whole class) on the slow-path type enables
+ // keeping this code at a generic, non architecture-specific place.
+ //
+ // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+ // To relax this requirement, we would need some RTTI on the stored slow-paths,
+ // or template the class as a whole on SlowPathType.
+ template <typename SlowPathCodeType>
+ SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+ static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+ "SlowPathCodeType is not a subclass of art::SlowPathCode");
+ static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+ "SlowPathCodeType is not constructible from InstructionType*");
+ // Iterate over potential candidates for sharing. Currently, only same-typed
+ // slow-paths with exactly the same dex-pc are viable candidates.
+ // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+ const uint32_t dex_pc = instruction->GetDexPc();
+ auto iter = slow_path_map_.find(dex_pc);
+ if (iter != slow_path_map_.end()) {
+ auto candidates = iter->second;
+ for (const auto& it : candidates) {
+ InstructionType* other_instruction = it.first;
+ SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+ // Determine if the instructions allow for slow-path sharing.
+ if (HaveSameLiveRegisters(instruction, other_instruction) &&
+ HaveSameStackMap(instruction, other_instruction)) {
+ // Can share: reuse existing one.
+ return other_slow_path;
+ }
+ }
+ } else {
+ // First time this dex-pc is seen.
+ iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+ }
+ // Cannot share: create and add new slow-path for this particular dex-pc.
+ SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+ iter->second.emplace_back(std::make_pair(instruction, slow_path));
+ codegen_->AddSlowPath(slow_path);
+ return slow_path;
+ }
+
+ private:
+ // Tests if both instructions have same set of live physical registers. This ensures
+ // the slow-path has exactly the same preamble on saving these registers to stack.
+ bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+ const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+ const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+ RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+ RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+ return (((live1->GetCoreRegisters() & core_spill) ==
+ (live2->GetCoreRegisters() & core_spill)) &&
+ ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+ (live2->GetFloatingPointRegisters() & fpu_spill)));
+ }
+
+ // Tests if both instructions have the same stack map. This ensures the interpreter
+ // will find exactly the same dex-registers at the same entries.
+ bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+ DCHECK(i1->HasEnvironment());
+ DCHECK(i2->HasEnvironment());
+ // We conservatively test if the two instructions find exactly the same instructions
+ // and location in each dex-register. This guarantees they will have the same stack map.
+ HEnvironment* e1 = i1->GetEnvironment();
+ HEnvironment* e2 = i2->GetEnvironment();
+ if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+ return false;
+ }
+ for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+ if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+ !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ HGraph* const graph_;
+ CodeGenerator* const codegen_;
+
+ // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+ ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+ InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+ : HGraphVisitor(graph),
+ deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+ // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+ // TODO: under current regime, only deopt sharing make sense; extend later.
+ SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9a1f2b8717..d64b8784e1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode {
class DeoptimizationSlowPathARM : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+ explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
};
@@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+ public:
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
public:
@@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
// to be instrumented, e.g.:
//
// __ LoadFromOffset(kLoadWord, out, out, offset);
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
class ReadBarrierForRootSlowPathARM : public SlowPathCode {
public:
ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const {
}
InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
- ShifterOperand operand;
- if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) {
- __ cmp(left, operand);
- } else {
- Register temp = IP;
- __ LoadImmediate(temp, right);
- __ cmp(left, ShifterOperand(temp));
- }
-}
-
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
Label* false_label) {
@@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
int32_t val_low = Low32Bits(value);
int32_t val_high = High32Bits(value);
- GenerateCompareWithImmediate(left_high, val_high);
+ __ CmpConstant(left_high, val_high);
if (if_cond == kCondNE) {
__ b(true_label, ARMCondition(true_high_cond));
} else if (if_cond == kCondEQ) {
@@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
__ b(false_label, ARMCondition(false_high_cond));
}
// Must be equal high, so compare the lows.
- GenerateCompareWithImmediate(left_low, val_low);
+ __ CmpConstant(left_low, val_low);
} else {
Register right_high = right.AsRegisterPairHigh<Register>();
Register right_low = right.AsRegisterPairLow<Register>();
@@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio
__ cmp(left, ShifterOperand(right.AsRegister<Register>()));
} else {
DCHECK(right.IsConstant());
- GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
}
if (true_target == nullptr) {
__ b(false_target, ARMCondition(condition->GetOppositeCondition()));
@@ -1610,8 +1655,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1623,6 +1667,10 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -1675,8 +1723,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
__ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
} else {
DCHECK(right.IsConstant());
- GenerateCompareWithImmediate(left.AsRegister<Register>(),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ __ CmpConstant(left.AsRegister<Register>(),
+ CodeGenerator::GetInt32ValueOf(right.GetConstant()));
}
__ it(ARMCondition(cond->GetCondition()), kItElse);
__ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
@@ -1891,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
}
void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
@@ -2846,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi
Register dividend = locations->InAt(0).AsRegister<Register>();
Register temp = locations->GetTemp(0).AsRegister<Register>();
int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (ctz_imm == 1) {
@@ -2923,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2952,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
- if (abs_imm <= 1) {
+ int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+ if (value == 1 || value == 0 || value == -1) {
// No temp register required.
} else {
locations->AddTemp(Location::RequiresRegister());
- if (!IsPowerOfTwo(abs_imm)) {
+ if (!IsPowerOfTwo(AbsOrMin(value))) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3078,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
- if (abs_imm <= 1) {
+ int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+ if (value == 1 || value == 0 || value == -1) {
// No temp register required.
} else {
locations->AddTemp(Location::RequiresRegister());
- if (!IsPowerOfTwo(abs_imm)) {
+ if (!IsPowerOfTwo(AbsOrMin(value))) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3437,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
Register first_reg = first.AsRegister<Register>();
if (second.IsRegister()) {
Register second_reg = second.AsRegister<Register>();
- // Arm doesn't mask the shift count so we need to do it ourselves.
+ // ARM doesn't mask the shift count so we need to do it ourselves.
__ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
if (op->IsShl()) {
__ Lsl(out_reg, first_reg, out_reg);
@@ -3449,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
} else {
int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
- if (shift_value == 0) { // arm does not support shifting with 0 immediate.
+ if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
__ Mov(out_reg, first_reg);
} else if (op->IsShl()) {
__ Lsl(out_reg, first_reg, shift_value);
@@ -3796,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
- // TODO (ported from quick): revisit Arm barrier kinds
- DmbOptions flavor = DmbOptions::ISH; // quiet c++ warnings
+void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+ // TODO (ported from quick): revisit ARM barrier kinds.
+ DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
switch (kind) {
case MemBarrierKind::kAnyStore:
case MemBarrierKind::kLoadAny:
@@ -3879,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
} else if (generate_volatile) {
- // Arm encoding have some additional constraints for ldrexd/strexd:
+ // ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
- // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
- // enable Arm encoding.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
locations->AddTemp(Location::RequiresRegister());
@@ -3913,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
switch (field_type) {
@@ -4005,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
}
@@ -4039,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
(overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
if (volatile_for_double) {
- // Arm encoding have some additional constraints for ldrexd/strexd:
+ // ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
- // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
- // enable Arm encoding.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4105,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (field_type) {
- case Primitive::kPrimBoolean: {
+ case Primitive::kPrimBoolean:
__ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimByte: {
+ case Primitive::kPrimByte:
__ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimShort: {
+ case Primitive::kPrimShort:
__ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimChar: {
+ case Primitive::kPrimChar:
__ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
break;
- }
case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
__ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
break;
+
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
+ break;
}
- case Primitive::kPrimLong: {
+ case Primitive::kPrimLong:
if (is_volatile && !atomic_ldrd_strd) {
GenerateWideAtomicLoad(base, offset,
out.AsRegisterPairLow<Register>(),
@@ -4140,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
__ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
}
break;
- }
- case Primitive::kPrimFloat: {
+ case Primitive::kPrimFloat:
__ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
break;
- }
case Primitive::kPrimDouble: {
DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
@@ -4167,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
- // Doubles are handled in the switch.
- if (field_type != Primitive::kPrimDouble) {
+ if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+ // Potential implicit null checks, in the case of reference or
+ // double fields, are handled in the previous switch statement.
+ } else {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
- }
-
- if (field_type == Primitive::kPrimNot) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
}
@@ -4340,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
@@ -4347,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Location index = locations->InAt(1);
- Primitive::Type type = instruction->GetType();
+ Location out_loc = locations->Out();
+ Primitive::Type type = instruction->GetType();
switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4366,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4380,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimShort: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4394,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4406,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+ case Primitive::kPrimInt: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4424,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadWord, out, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- Location out = locations->Out();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+ __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
+ __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset);
}
break;
}
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- Location out = locations->Out();
- DCHECK(out.IsFpuRegister());
+ SRegister out = out_loc.AsFpuRegister<SRegister>();
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+ __ LoadSFromOffset(out, obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+ __ LoadSFromOffset(out, IP, data_offset);
}
break;
}
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- Location out = locations->Out();
- DCHECK(out.IsFpuRegisterPair());
+ SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ __ LoadDFromOffset(FromLowSToD(out), obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
- __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ __ LoadDFromOffset(FromLowSToD(out), IP, data_offset);
}
break;
}
@@ -4470,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location out = locations->Out();
- if (index.IsConstant()) {
- uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
- }
+ // Potential implicit null checks, in the case of reference
+ // arrays, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -4574,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, source, IP, data_offset);
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
DCHECK(!may_need_runtime_call_for_type_check);
break;
@@ -4615,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
// __ Mov(temp2, temp1);
// // /* HeapReference<Class> */ temp1 = temp1->component_type_
// __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = value->klass_
// __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
//
// __ cmp(temp1, ShifterOperand(temp2));
@@ -4717,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, value, IP, data_offset);
}
-
- codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -4770,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
UNREACHABLE();
}
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+ // Objects are handled in the switch.
+ if (value_type != Primitive::kPrimNot) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -5140,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ AddConstant(out, current_method, declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
@@ -5157,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
out,
current_method,
ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ AddConstant(out, out, cache_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ LoadFromOffset(kLoadWord, out, out, cache_offset);
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -5230,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ AddConstant(out, current_method, declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ AddConstant(out, out, cache_offset);
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ LoadFromOffset(kLoadWord, out, out, cache_offset);
- }
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
if (!load->IsInDexCache()) {
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
@@ -5300,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5326,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
+ Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5355,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
__ cmp(out, ShifterOperand(cls));
// Classes must be equal for the instanceof to succeed.
@@ -5373,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// object to avoid doing a comparison we know will fail.
Label loop;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ cmp(out, ShifterOperand(cls));
@@ -5401,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ Bind(&loop);
__ cmp(out, ShifterOperand(cls));
__ b(&success, EQ);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ b(&done);
@@ -5429,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ cmp(out, ShifterOperand(cls));
__ b(&exact_check, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ LoadFromOffset(kLoadWord, out, out, component_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5478,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
/* is_fatal */ false);
@@ -5532,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) {
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
+ Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5571,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -5589,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// object to avoid doing a comparison we know will fail.
Label loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -5612,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -5629,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ cmp(temp, ShifterOperand(cls));
__ b(&done, EQ);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -5651,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5664,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ b(&done, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -5689,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -5699,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ CompareAndBranchIfZero(temp, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5717,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5901,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
}
}
-void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Mov(temp.AsRegister<Register>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ Register obj_reg = obj.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset) {
+ Register root_reg = root.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ __ LoadFromOffset(
+ kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+ __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ AddConstant(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, offset, no_index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as:
+ // - it implements the load-load fence using a data dependency on
+ // the high-bits of rb_state, which are expected to be all zeroes;
+ // - it performs additional checks that we do not do here for
+ // performance reasons.
+
+ Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
+ __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Introduce a dependency on the high bits of rb_state, which shall
+ // be all zeroes, to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
+ __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+ // obj is unchanged by this operation, but its value now depends on
+ // IP, which depends on temp_reg.
+ __ add(obj, obj, ShifterOperand(IP));
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + offset + index * sizeof(HeapReference<Object>))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
+ __ b(slow_path->GetEntryLabel(), EQ);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -5922,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ b(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
__ UnpoisonHeapReference(out.AsRegister<Register>());
}
}
-void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ b(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6304,7 +6549,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr)
}
if (num_entries - last_index == 2) {
// The last missing case_value.
- GenerateCompareWithImmediate(temp_reg, 1);
+ __ CmpConstant(temp_reg, 1);
__ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
}
@@ -6364,7 +6609,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB
void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
if (!trg.IsValid()) {
- DCHECK(type == Primitive::kPrimVoid);
+ DCHECK_EQ(type, Primitive::kPrimVoid);
return;
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index b7c58e1248..26d6d63b31 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
};
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
@@ -222,24 +222,57 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void HandleLongRotate(LocationSummary* locations);
void HandleRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
- void GenerateMemoryBarrier(MemBarrierKind kind);
+
void GenerateWideAtomicStore(Register addr, uint32_t offset,
Register value_lo, Register value_hi,
Register temp1, Register temp2,
HInstruction* instruction);
void GenerateWideAtomicLoad(Register addr, uint32_t offset,
Register out_lo, Register out_hi);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a read barrier.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a Baker's read barrier.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset);
+
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Label* true_target,
Label* false_target);
- void GenerateCompareWithImmediate(Register left, int32_t right);
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
@@ -346,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator {
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
Label* GetLabelOf(HBasicBlock* block) const {
return CommonGetLabelOf<Label>(block_labels_, block);
}
@@ -406,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator {
return &it->second;
}
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -423,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator {
// When `index` is provided (i.e. for array accesses), the offset
// value passed to artReadBarrierSlow is adjusted to take `index`
// into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // Generate a read barrier for a GC root within `instruction`.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -449,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator {
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b49f42b6c8..a3150d3d22 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
public:
- explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
};
@@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct
InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
CodeGeneratorARM64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
int64_t imm = Int64FromConstant(second.GetConstant());
- uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2940,9 +2939,8 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathARM64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -2954,6 +2952,10 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e90ac6345..f2ff89488e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
};
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 4648606da8..322912976e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
public:
- explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+ explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
instruction_,
- dex_pc,
+ instruction_->GetDexPc(),
this,
IsDirectEntrypoint(kQuickDeoptimize));
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
@@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
};
@@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) {
// then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
// unpredictable and the following mfch1 will fail.
__ Mfc1(TMP, f1);
- __ Mfhc1(AT, f1);
+ __ MoveFromFpuHigh(AT, f1);
__ Mtc1(r2_l, f1);
- __ Mthc1(r2_h, f1);
+ __ MoveToFpuHigh(r2_h, f1);
__ Move(r2_l, TMP);
__ Move(r2_h, AT);
} else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
@@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
Register dst_low = destination.AsRegisterPairLow<Register>();
FRegister src = source.AsFpuRegister<FRegister>();
__ Mfc1(dst_low, src);
- __ Mfhc1(dst_high, src);
+ __ MoveFromFpuHigh(dst_high, src);
} else {
DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
int32_t off = source.GetStackIndex();
@@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
Register src_high = source.AsRegisterPairHigh<Register>();
Register src_low = source.AsRegisterPairLow<Register>();
__ Mtc1(src_low, dst);
- __ Mthc1(src_high, dst);
+ __ MoveToFpuHigh(src_high, dst);
} else if (source.IsFpuRegister()) {
__ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
} else {
@@ -1241,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi
InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
CodeGeneratorMIPS* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1511,7 +1508,7 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction)
}
void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
Primitive::Type type = instr->GetResultType();
@@ -1534,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = instr->GetLocations();
Primitive::Type type = instr->GetType();
@@ -1542,30 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
bool use_imm = rhs_location.IsConstant();
Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
- uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
- uint32_t shift_value = rhs_imm & shift_mask;
- // Is the INS (Insert Bit Field) instruction supported?
- bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ const uint32_t shift_mask = (type == Primitive::kPrimInt)
+ ? kMaxIntShiftValue
+ : kMaxLongShiftValue;
+ const uint32_t shift_value = rhs_imm & shift_mask;
+ // Are the INS (Insert Bit Field) and ROTR instructions supported?
+ bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
switch (type) {
case Primitive::kPrimInt: {
Register dst = locations->Out().AsRegister<Register>();
Register lhs = locations->InAt(0).AsRegister<Register>();
if (use_imm) {
- if (instr->IsShl()) {
+ if (shift_value == 0) {
+ if (dst != lhs) {
+ __ Move(dst, lhs);
+ }
+ } else if (instr->IsShl()) {
__ Sll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Sra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst, lhs, shift_value);
+ } else {
+ if (has_ins_rotr) {
+ __ Rotr(dst, lhs, shift_value);
+ } else {
+ __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask);
+ __ Srl(dst, lhs, shift_value);
+ __ Or(dst, dst, TMP);
+ }
}
} else {
if (instr->IsShl()) {
__ Sllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Srav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst, lhs, rhs_reg);
+ } else {
+ if (has_ins_rotr) {
+ __ Rotrv(dst, lhs, rhs_reg);
+ } else {
+ __ Subu(TMP, ZERO, rhs_reg);
+ // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+ // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+ // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+ // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+ // IOW, the OR'd values are equal.
+ __ Sllv(TMP, lhs, TMP);
+ __ Srlv(dst, lhs, rhs_reg);
+ __ Or(dst, dst, TMP);
+ }
}
}
break;
@@ -1580,7 +1605,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
if (shift_value == 0) {
codegen_->Move64(locations->Out(), locations->InAt(0));
} else if (shift_value < kMipsBitsPerWord) {
- if (has_ins) {
+ if (has_ins_rotr) {
if (instr->IsShl()) {
__ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
__ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
@@ -1589,10 +1614,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Srl(dst_low, lhs_low, shift_value);
__ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
__ Sra(dst_high, lhs_high, shift_value);
+ } else if (instr->IsUShr()) {
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+ __ Srl(dst_high, lhs_high, shift_value);
} else {
__ Srl(dst_low, lhs_low, shift_value);
__ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
__ Srl(dst_high, lhs_high, shift_value);
+ __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
}
} else {
if (instr->IsShl()) {
@@ -1605,24 +1635,51 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
__ Srl(dst_low, lhs_low, shift_value);
__ Or(dst_low, dst_low, TMP);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst_high, lhs_high, shift_value);
__ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
__ Srl(dst_low, lhs_low, shift_value);
__ Or(dst_low, dst_low, TMP);
+ } else {
+ __ Srl(TMP, lhs_low, shift_value);
+ __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value);
+ __ Or(dst_low, dst_low, TMP);
+ __ Srl(TMP, lhs_high, shift_value);
+ __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+ __ Or(dst_high, dst_high, TMP);
}
}
} else {
- shift_value -= kMipsBitsPerWord;
+ const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
if (instr->IsShl()) {
- __ Sll(dst_high, lhs_low, shift_value);
+ __ Sll(dst_high, lhs_low, shift_value_high);
__ Move(dst_low, ZERO);
} else if (instr->IsShr()) {
- __ Sra(dst_low, lhs_high, shift_value);
+ __ Sra(dst_low, lhs_high, shift_value_high);
__ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
- } else {
- __ Srl(dst_low, lhs_high, shift_value);
+ } else if (instr->IsUShr()) {
+ __ Srl(dst_low, lhs_high, shift_value_high);
__ Move(dst_high, ZERO);
+ } else {
+ if (shift_value == kMipsBitsPerWord) {
+ // 64-bit rotation by 32 is just a swap.
+ __ Move(dst_low, lhs_high);
+ __ Move(dst_high, lhs_low);
+ } else {
+ if (has_ins_rotr) {
+ __ Srl(dst_low, lhs_high, shift_value_high);
+ __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+ __ Srl(dst_high, lhs_low, shift_value_high);
+ __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
+ } else {
+ __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+ __ Srl(dst_low, lhs_high, shift_value_high);
+ __ Or(dst_low, dst_low, TMP);
+ __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+ __ Srl(dst_high, lhs_low, shift_value_high);
+ __ Or(dst_high, dst_high, TMP);
+ }
+ }
}
}
} else {
@@ -1649,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Beqz(TMP, &done);
__ Move(dst_low, dst_high);
__ Sra(dst_high, dst_high, 31);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst_high, lhs_high, rhs_reg);
__ Nor(AT, ZERO, rhs_reg);
__ Sll(TMP, lhs_high, 1);
@@ -1660,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Beqz(TMP, &done);
__ Move(dst_low, dst_high);
__ Move(dst_high, ZERO);
+ } else {
+ __ Nor(AT, ZERO, rhs_reg);
+ __ Srlv(TMP, lhs_low, rhs_reg);
+ __ Sll(dst_low, lhs_high, 1);
+ __ Sllv(dst_low, dst_low, AT);
+ __ Or(dst_low, dst_low, TMP);
+ __ Srlv(TMP, lhs_high, rhs_reg);
+ __ Sll(dst_high, lhs_low, 1);
+ __ Sllv(dst_high, dst_high, AT);
+ __ Or(dst_high, dst_high, TMP);
+ __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+ __ Beqz(TMP, &done);
+ __ Move(TMP, dst_high);
+ __ Move(dst_high, dst_low);
+ __ Move(dst_low, TMP);
}
__ Bind(&done);
}
@@ -2314,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct
Register out = locations->Out().AsRegister<Register>();
Register dividend = locations->InAt(0).AsRegister<Register>();
int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (instruction->IsDiv()) {
@@ -2418,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -3358,8 +3429,8 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeMIPS* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -3371,6 +3442,10 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3457,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
// Need to move to FP regs since FP results are returned in core registers.
__ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
locations->Out().AsFpuRegister<FRegister>());
- __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
- locations->Out().AsFpuRegister<FRegister>());
+ __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
}
} else {
if (!Primitive::IsFloatingPointType(type)) {
@@ -3578,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
// Pass FP parameters in core registers.
__ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
locations->InAt(1).AsFpuRegister<FRegister>());
- __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
- locations->InAt(1).AsFpuRegister<FRegister>());
+ __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ locations->InAt(1).AsFpuRegister<FRegister>());
}
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
instruction,
@@ -4536,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN
codegen_->GenerateFrameExit();
}
-void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void LocationsBuilderMIPS::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
-void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
void LocationsBuilderMIPS::VisitShl(HShl* shl) {
@@ -4731,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
Primitive::Type input_type = conversion->GetInputType();
Primitive::Type result_type = conversion->GetResultType();
DCHECK_NE(input_type, result_type);
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
(result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4738,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
}
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
- (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+ if (!isR6 &&
+ ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+ (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
call_kind = LocationSummary::kCall;
}
@@ -4777,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
Primitive::Type result_type = conversion->GetResultType();
Primitive::Type input_type = conversion->GetInputType();
bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
DCHECK_NE(input_type, result_type);
@@ -4822,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
<< " to " << result_type;
}
} else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
- if (input_type != Primitive::kPrimLong) {
+ if (input_type == Primitive::kPrimLong) {
+ if (isR6) {
+ // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+ // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+ Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ __ Mtc1(src_low, FTMP);
+ __ Mthc1(src_high, FTMP);
+ if (result_type == Primitive::kPrimFloat) {
+ __ Cvtsl(dst, FTMP);
+ } else {
+ __ Cvtdl(dst, FTMP);
+ }
+ } else {
+ int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+ : QUICK_ENTRY_POINT(pL2d);
+ bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+ : IsDirectEntrypoint(kQuickL2d);
+ codegen_->InvokeRuntime(entry_offset,
+ conversion,
+ conversion->GetDexPc(),
+ nullptr,
+ direct);
+ if (result_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ } else {
+ CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ }
+ }
+ } else {
Register src = locations->InAt(0).AsRegister<Register>();
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
__ Mtc1(src, FTMP);
@@ -4831,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
} else {
__ Cvtdw(dst, FTMP);
}
- } else {
- int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
- : QUICK_ENTRY_POINT(pL2d);
- bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
- : IsDirectEntrypoint(kQuickL2d);
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr,
- direct);
- if (result_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickL2f, float, int64_t>();
- } else {
- CheckEntrypointTypes<kQuickL2d, double, int64_t>();
- }
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
- int32_t entry_offset;
- bool direct;
- if (result_type != Primitive::kPrimLong) {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
- : QUICK_ENTRY_POINT(pD2iz);
- direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
- : IsDirectEntrypoint(kQuickD2iz);
+ if (result_type == Primitive::kPrimLong) {
+ if (isR6) {
+ // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+ // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+ Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+ MipsLabel truncate;
+ MipsLabel done;
+
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+ //
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
+ if (input_type == Primitive::kPrimFloat) {
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
+ __ CmpLeS(FTMP, FTMP, src);
+ } else {
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ __ Mthc1(TMP, FTMP);
+ __ CmpLeD(FTMP, FTMP, src);
+ }
+
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ __ Move(dst_low, ZERO);
+ __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+ __ Mfc1(TMP, FTMP);
+ __ And(dst_high, dst_high, TMP);
+
+ __ B(&done);
+
+ __ Bind(&truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ TruncLS(FTMP, src);
+ } else {
+ __ TruncLD(FTMP, src);
+ }
+ __ Mfc1(dst_low, FTMP);
+ __ Mfhc1(dst_high, FTMP);
+
+ __ Bind(&done);
+ } else {
+ int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+ : QUICK_ENTRY_POINT(pD2l);
+ bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+ : IsDirectEntrypoint(kQuickD2l);
+ codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+ if (input_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ } else {
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ }
+ }
} else {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
- : QUICK_ENTRY_POINT(pD2l);
- direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
- : IsDirectEntrypoint(kQuickD2l);
- }
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr,
- direct);
- if (result_type != Primitive::kPrimLong) {
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ Register dst = locations->Out().AsRegister<Register>();
+ MipsLabel truncate;
+ MipsLabel done;
+
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // For details see the large comment above for the truncation of float/double to long on R6.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
} else {
- CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ if (fpu_32bit) {
+ __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+ } else {
+ __ Mthc1(TMP, FTMP);
+ }
}
- } else {
+
+ if (isR6) {
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpLeS(FTMP, FTMP, src);
+ } else {
+ __ CmpLeD(FTMP, FTMP, src);
+ }
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ __ Mfc1(TMP, FTMP);
+ __ And(dst, dst, TMP);
+ } else {
+ if (input_type == Primitive::kPrimFloat) {
+ __ ColeS(0, FTMP, src);
+ } else {
+ __ ColeD(0, FTMP, src);
+ }
+ __ Bc1t(0, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CeqS(0, src, src);
+ } else {
+ __ CeqD(0, src, src);
+ }
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ __ Movf(dst, ZERO, 0);
+ }
+
+ __ B(&done);
+
+ __ Bind(&truncate);
+
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ __ TruncWS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ __ TruncWD(FTMP, src);
}
+ __ Mfc1(dst, FTMP);
+
+ __ Bind(&done);
}
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 38302ad315..c3d4851ee9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -197,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
};
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 05834ff063..38c32cad06 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
};
@@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc
InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
CodeGeneratorMIPS64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio
}
void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
Primitive::Type type = instr->GetResultType();
@@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
}
void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = instr->GetLocations();
Primitive::Type type = instr->GetType();
@@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
: static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
- if (type == Primitive::kPrimInt) {
+ if (shift_value == 0) {
+ if (dst != lhs) {
+ __ Move(dst, lhs);
+ }
+ } else if (type == Primitive::kPrimInt) {
if (instr->IsShl()) {
__ Sll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Sra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst, lhs, shift_value);
+ } else {
+ __ Rotr(dst, lhs, shift_value);
}
} else {
if (shift_value < 32) {
@@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Dsll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Dsra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrl(dst, lhs, shift_value);
+ } else {
+ __ Drotr(dst, lhs, shift_value);
}
} else {
shift_value -= 32;
@@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Dsll32(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Dsra32(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrl32(dst, lhs, shift_value);
+ } else {
+ __ Drotr32(dst, lhs, shift_value);
}
}
}
@@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Sllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Srav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst, lhs, rhs_reg);
+ } else {
+ __ Rotrv(dst, lhs, rhs_reg);
}
} else {
if (instr->IsShl()) {
__ Dsllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Dsrav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrlv(dst, lhs, rhs_reg);
+ } else {
+ __ Drotrv(dst, lhs, rhs_reg);
}
}
}
@@ -1955,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
- uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (instruction->IsDiv()) {
@@ -2138,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2736,9 +2749,8 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathMIPS64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeMIPS64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -2750,6 +2762,10 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3722,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_
codegen_->GenerateFrameExit();
}
-void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void LocationsBuilderMIPS64::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
-void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
@@ -3918,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
}
- LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
- (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
- call_kind = LocationSummary::kCall;
- }
-
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
- if (call_kind == LocationSummary::kNoCall) {
- if (Primitive::IsFloatingPointType(input_type)) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-
- if (Primitive::IsFloatingPointType(result_type)) {
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
- } else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ if (Primitive::IsFloatingPointType(input_type)) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
} else {
- InvokeRuntimeCallingConvention calling_convention;
-
- if (Primitive::IsFloatingPointType(input_type)) {
- locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
- locations->SetOut(calling_convention.GetReturnLocation(result_type));
+ if (Primitive::IsFloatingPointType(result_type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
}
@@ -3992,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
<< " to " << result_type;
}
} else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
- if (input_type != Primitive::kPrimLong) {
- FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
- GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
- __ Mtc1(src, FTMP);
+ FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+ GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+ if (input_type == Primitive::kPrimLong) {
+ __ Dmtc1(src, FTMP);
if (result_type == Primitive::kPrimFloat) {
- __ Cvtsw(dst, FTMP);
+ __ Cvtsl(dst, FTMP);
} else {
- __ Cvtdw(dst, FTMP);
+ __ Cvtdl(dst, FTMP);
}
} else {
- int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
- : QUICK_ENTRY_POINT(pL2d);
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ __ Mtc1(src, FTMP);
if (result_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ __ Cvtsw(dst, FTMP);
} else {
- CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ __ Cvtdw(dst, FTMP);
}
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
- int32_t entry_offset;
- if (result_type != Primitive::kPrimLong) {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
- : QUICK_ENTRY_POINT(pD2iz);
+ GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+ FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ Mips64Label truncate;
+ Mips64Label done;
+
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+ //
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
+ if (input_type == Primitive::kPrimFloat) {
+ uint32_t min_val = (result_type == Primitive::kPrimLong)
+ ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+ : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
+ __ CmpLeS(FTMP, FTMP, src);
} else {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
- : QUICK_ENTRY_POINT(pD2l);
+ uint64_t min_val = (result_type == Primitive::kPrimLong)
+ ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+ : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst64(TMP, min_val);
+ __ Dmtc1(TMP, FTMP);
+ __ CmpLeD(FTMP, FTMP, src);
}
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr);
- if (result_type != Primitive::kPrimLong) {
+
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ if (result_type == Primitive::kPrimLong) {
+ __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+ } else {
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ }
+ __ Mfc1(TMP, FTMP);
+ __ And(dst, dst, TMP);
+
+ __ Bc(&done);
+
+ __ Bind(&truncate);
+
+ if (result_type == Primitive::kPrimLong) {
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+ __ TruncLS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+ __ TruncLD(FTMP, src);
}
+ __ Dmfc1(dst, FTMP);
} else {
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ __ TruncWS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ __ TruncWD(FTMP, src);
}
+ __ Mfc1(dst, FTMP);
}
+
+ __ Bind(&done);
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 60ff96dc43..7182e8e987 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -201,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
};
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 86327fb741..6ab3aaff4b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
class DeoptimizationSlowPathX86 : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+ explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- DCHECK(instruction_->IsDeoptimize());
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
};
@@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const {
}
InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1611,9 +1610,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathX86(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1625,6 +1622,10 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3223,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
Register out_register = locations->Out().AsRegister<Register>();
Register input_register = locations->InAt(0).AsRegister<Register>();
int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
- DCHECK(IsPowerOfTwo(std::abs(imm)));
Register num = locations->GetTemp(0).AsRegister<Register>();
- __ leal(num, Address(input_register, std::abs(imm) - 1));
+ __ leal(num, Address(input_register, abs_imm - 1));
__ testl(input_register, input_register);
__ cmovl(kGreaterEqual, num, input_register);
int shift = CTZ(imm);
@@ -3340,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+ } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index df7347658b..c65c423eae 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
};
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 76a4ce2e93..294b40e3d4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
class DeoptimizationSlowPathX86_64 : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- deoptimize,
- deoptimize->GetDexPc(),
+ instruction_,
+ instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
};
@@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
CodeGeneratorX86_64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1594,9 +1592,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathX86_64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1608,6 +1604,10 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3350,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
-
- DCHECK(IsPowerOfTwo(std::abs(imm)));
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint64_t abs_imm = AbsOrMin(imm);
CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (instruction->GetResultType() == Primitive::kPrimInt) {
- __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+ __ leal(tmp, Address(numerator, abs_imm - 1));
__ testl(numerator, numerator);
__ cmov(kGreaterEqual, tmp, numerator);
int shift = CTZ(imm);
@@ -3371,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
- codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+ codegen_->Load64BitValue(rdx, abs_imm - 1);
__ addq(rdx, numerator);
__ testq(numerator, numerator);
__ cmov(kGreaterEqual, rdx, numerator);
@@ -3529,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+ } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c5e8a04da6..505c9dcdad 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
};
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c504ded54c..b90afb1d73 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
- // This simplification is currently supported on x86, x86_64, ARM and ARM64.
- // TODO: Implement it for MIPS/64.
- const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
- switch (instruction_set) {
- case kArm:
- case kArm64:
- case kThumb2:
- case kX86:
- case kX86_64:
- break;
- default:
- return false;
- }
DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
HInstruction* left = op->GetLeft();
HInstruction* right = op->GetRight();
@@ -1261,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
DCHECK(invoke->IsInvokeStaticOrDirect());
DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
- // This simplification is currently supported on x86, x86_64, ARM and ARM64.
- // TODO: Implement it for MIPS/64.
- const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
- switch (instruction_set) {
- case kArm:
- case kArm64:
- case kThumb2:
- case kX86:
- case kX86_64:
- break;
- default:
- return;
- }
HInstruction* value = invoke->InputAt(0);
HInstruction* distance = invoke->InputAt(1);
// Replace the invoke with an HRor.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4683aee603..b1fbf28204 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke,
bool is_volatile,
CodeGeneratorARM* codegen) {
LocationSummary* locations = invoke->GetLocations();
- DCHECK((type == Primitive::kPrimInt) ||
- (type == Primitive::kPrimLong) ||
- (type == Primitive::kPrimNot));
ArmAssembler* assembler = codegen->GetAssembler();
Location base_loc = locations->InAt(1);
Register base = base_loc.AsRegister<Register>(); // Object pointer.
@@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke,
Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only.
Location trg_loc = locations->Out();
- if (type == Primitive::kPrimLong) {
- Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
- __ add(IP, base, ShifterOperand(offset));
- if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
- Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
- __ ldrexd(trg_lo, trg_hi, IP);
- } else {
- __ ldrd(trg_lo, Address(IP));
+ switch (type) {
+ case Primitive::kPrimInt: {
+ Register trg = trg_loc.AsRegister<Register>();
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ break;
}
- } else {
- Register trg = trg_loc.AsRegister<Register>();
- __ ldr(trg, Address(base, offset));
- }
- if (is_volatile) {
- __ dmb(ISH);
- }
+ case Primitive::kPrimNot: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ } else {
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ }
+ } else {
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
+ break;
+ }
- if (type == Primitive::kPrimNot) {
- codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ case Primitive::kPrimLong: {
+ Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+ __ add(IP, base, ShifterOperand(offset));
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+ __ ldrexd(trg_lo, trg_hi, IP);
+ } else {
+ __ ldrd(trg_lo, Address(IP));
+ }
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type " << type;
+ UNREACHABLE();
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
@@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
}
// Prevent reordering with prior memory operations.
+ // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+ // latter allows a preceding load to be delayed past the STXR
+ // instruction below.
__ dmb(ISH);
__ add(tmp_ptr, base, ShifterOperand(offset));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f723940444..81cab86c83 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1035,7 +1035,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
__ Cbnz(tmp_32, &loop_head);
} else {
- __ Dmb(InnerShareable, BarrierWrites);
+ // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
+ // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
+ // one, as the latter allows a preceding load to be delayed past
+ // the STXR instruction below.
+ __ Dmb(InnerShareable, BarrierAll);
__ Bind(&loop_head);
// TODO: When `type == Primitive::kPrimNot`, add a read barrier for
// the reference stored in the object before attempting the CAS,
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 06fab616ad..bc126a2716 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() {
return codegen_->GetGraph()->GetArena();
}
-inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
}
-inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
return codegen_->GetInstructionSetFeatures().IsR6();
}
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+ return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler*
Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
__ Mfc1(out_lo, in);
- __ Mfhc1(out_hi, in);
+ __ MoveFromFpuHigh(out_hi, in);
} else {
Register out = locations->Out().AsRegister<Register>();
@@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler*
Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
__ Mtc1(in_lo, out);
- __ Mthc1(in_hi, out);
+ __ MoveToFpuHigh(in_hi, out);
} else {
Register in = locations->InAt(0).AsRegister<Register>();
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index f86b0efe4a..575a7d0a23 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,8 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
- bool IsR2OrNewer(void);
- bool IsR6(void);
+ bool IsR2OrNewer() const;
+ bool IsR6() const;
+ bool Is32BitFPU() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f5a7048b01..b80c6bde82 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2183,10 +2183,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
IntrinsicExceptions exceptions) {
intrinsic_ = intrinsic;
IntrinsicOptimizations opt(this);
- if (needs_env_or_cache == kNoEnvironmentOrCache) {
- opt.SetDoesNotNeedDexCache();
- opt.SetDoesNotNeedEnvironment();
- }
+
// Adjust method's side effects from intrinsic table.
switch (side_effects) {
case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
@@ -2194,6 +2191,14 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
}
+
+ if (needs_env_or_cache == kNoEnvironmentOrCache) {
+ opt.SetDoesNotNeedDexCache();
+ opt.SetDoesNotNeedEnvironment();
+ } else {
+ // If we need an environment, that means there will be a call, which can trigger GC.
+ SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
+ }
// Adjust method's exception status from intrinsic table.
switch (exceptions) {
case kNoThrow: SetCanThrow(false); break;
@@ -2325,4 +2330,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction*
}
}
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) {
+ os << "["
+ << " source=" << rhs.GetSource()
+ << " destination=" << rhs.GetDestination()
+ << " type=" << rhs.GetType()
+ << " instruction=";
+ if (rhs.GetInstruction() != nullptr) {
+ os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId();
+ } else {
+ os << "null";
+ }
+ os << " ]";
+ return os;
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 59c07690b1..23132308f0 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1881,6 +1881,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return false;
}
+ virtual bool IsActualObject() const {
+ return GetType() == Primitive::kPrimNot;
+ }
+
void SetReferenceTypeInfo(ReferenceTypeInfo rti);
ReferenceTypeInfo GetReferenceTypeInfo() const {
@@ -2500,8 +2504,10 @@ class HTryBoundary : public HTemplateInstruction<0> {
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize : public HTemplateInstruction<1> {
public:
+ // We set CanTriggerGC to prevent any intermediate address to be live
+ // at the point of the `HDeoptimize`.
HDeoptimize(HInstruction* cond, uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::None(), dex_pc) {
+ : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, cond);
}
@@ -4017,8 +4023,10 @@ class HRem : public HBinaryOperation {
class HDivZeroCheck : public HExpression<1> {
public:
+ // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
+ // constructor.
HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, value);
}
@@ -4539,8 +4547,10 @@ class HPhi : public HInstruction {
class HNullCheck : public HExpression<1> {
public:
+ // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
+ // constructor.
HNullCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, value);
}
@@ -4861,8 +4871,10 @@ class HArrayLength : public HExpression<1> {
class HBoundsCheck : public HExpression<2> {
public:
+ // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
+ // constructor.
HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
- : HExpression(index->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
DCHECK(index->GetType() == Primitive::kPrimInt);
SetRawInputAt(0, index);
SetRawInputAt(1, length);
@@ -5626,8 +5638,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
}
bool IsPending() const {
- DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
- return destination_.IsInvalid() && !source_.IsInvalid();
+ DCHECK(source_.IsValid() || destination_.IsInvalid());
+ return destination_.IsInvalid() && source_.IsValid();
}
// True if this blocks a move from the given location.
@@ -5671,6 +5683,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
HInstruction* instruction_;
};
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
+
static constexpr size_t kDefaultNumberOfMoves = 4;
class HParallelMove : public HTemplateInstruction<0> {
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 18405f2623..445cdab191 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> {
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+ bool IsActualObject() const OVERRIDE { return false; }
HInstruction* GetBaseAddress() const { return InputAt(0); }
HInstruction* GetOffset() const { return InputAt(1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cafc6c5440..bb840eabdd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,6 +17,7 @@
#include "optimizing_compiler.h"
#include <fstream>
+#include <memory>
#include <stdint.h>
#ifdef ART_ENABLE_CODEGEN_arm64
@@ -52,6 +53,8 @@
#include "driver/compiler_driver-inl.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer_debug.h"
#include "elf_writer_quick.h"
#include "graph_checker.h"
#include "graph_visualizer.h"
@@ -60,6 +63,7 @@
#include "inliner.h"
#include "instruction_simplifier.h"
#include "intrinsics.h"
+#include "jit/debugger_interface.h"
#include "jit/jit_code_cache.h"
#include "licm.h"
#include "jni/quick/jni_compiler.h"
@@ -68,6 +72,7 @@
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
#include "register_allocator.h"
+#include "oat_quick_method_header.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
#include "ssa_builder.h"
@@ -965,6 +970,39 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return false;
}
+ if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+ const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+ const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+ CompiledMethod compiled_method(
+ GetCompilerDriver(),
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(code_allocator.GetMemory()),
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ codegen->GetFpuSpillMask(),
+ ArrayRef<const SrcMapElem>(),
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
+ dwarf::MethodDebugInfo method_debug_info {
+ dex_file,
+ class_def_idx,
+ method_idx,
+ access_flags,
+ code_item,
+ false, // deduped.
+ code_address,
+ code_address + code_allocator.GetSize(),
+ &compiled_method
+ };
+ ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info);
+ CreateJITCodeEntryForAddress(code_address,
+ std::unique_ptr<const uint8_t[]>(elf_file.data()),
+ elf_file.size());
+ }
+
return true;
}
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 176c50ce21..9d136f3ae6 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <iostream>
#include "parallel_move_resolver.h"
@@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
i = -1;
} else if (required_swap != nullptr) {
// A move is required to swap. We walk back the cycle to find the
- // move by just returning from this `PerforrmMove`.
+ // move by just returning from this `PerformMove`.
moves_[index]->ClearPending(destination);
return required_swap;
}
@@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
} else {
for (MoveOperands* other_move : moves_) {
if (other_move->Blocks(destination)) {
- DCHECK(other_move->IsPending());
+ DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
if (!move->Is64BitMove() && other_move->Is64BitMove()) {
// We swap 64bits moves before swapping 32bits moves. Go back from the
// cycle by returning the move that must be swapped.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index d1770b75ab..63ef600756 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
if (can_merge_with_load_class && !load_class->HasUses()) {
load_class->GetBlock()->RemoveInstruction(load_class);
}
- } else if (can_merge_with_load_class) {
+ } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
// Pass the initialization duty to the `HLoadClass` instruction,
// and remove the instruction from the graph.
load_class->SetMustGenerateClinitCheck(true);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 5ab4547e22..2bae4bc5c8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1679,6 +1679,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
LocationSummary* locations = safepoint_position->GetLocations();
if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
@@ -1691,6 +1694,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
maximum_number_of_live_fp_registers_);
}
if (current->GetType() == Primitive::kPrimNot) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
locations->SetRegisterBit(source.reg());
}
break;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4eacaa..4784de1380 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
}
- if (entry.num_dex_registers == 0) {
+ if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
// No dex map available.
stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
} else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502fde6..604787fd92 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.EndStackMapEntry();
+ number_of_dex_registers = 1;
+ stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+ stream.EndStackMapEntry();
+
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
CodeInfo code_info(region);
StackMapEncoding encoding = code_info.ExtractEncoding();
ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
- ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+ stack_map = code_info.GetStackMapAt(1, encoding);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+ ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+ ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+ ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+ ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
}
TEST(StackMapTest, InlineTest) {