summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc11
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc2
-rw-r--r--compiler/optimizing/bytecode_utils.h180
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc7
-rw-r--r--compiler/optimizing/code_generator.cc17
-rw-r--r--compiler/optimizing/code_generator.h6
-rw-r--r--compiler/optimizing/code_generator_arm.cc937
-rw-r--r--compiler/optimizing/code_generator_arm.h8
-rw-r--r--compiler/optimizing/code_generator_arm64.cc63
-rw-r--r--compiler/optimizing/code_generator_arm64.h7
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc995
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h8
-rw-r--r--compiler/optimizing/code_generator_mips.cc1803
-rw-r--r--compiler/optimizing/code_generator_mips.h133
-rw-r--r--compiler/optimizing/code_generator_mips64.cc2065
-rw-r--r--compiler/optimizing/code_generator_mips64.h133
-rw-r--r--compiler/optimizing/code_generator_vector_arm.cc243
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc672
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc243
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc243
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc243
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc807
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc800
-rw-r--r--compiler/optimizing/code_generator_x86.cc47
-rw-r--r--compiler/optimizing/code_generator_x86.h8
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc61
-rw-r--r--compiler/optimizing/code_generator_x86_64.h7
-rw-r--r--compiler/optimizing/code_sinking.cc403
-rw-r--r--compiler/optimizing/code_sinking.h48
-rw-r--r--compiler/optimizing/codegen_test_utils.h1
-rw-r--r--compiler/optimizing/common_arm.h5
-rw-r--r--compiler/optimizing/common_dominator.h6
-rw-r--r--compiler/optimizing/graph_visualizer.cc10
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc16
-rw-r--r--compiler/optimizing/induction_var_range.cc108
-rw-r--r--compiler/optimizing/induction_var_range.h16
-rw-r--r--compiler/optimizing/induction_var_range_test.cc112
-rw-r--r--compiler/optimizing/inliner.cc840
-rw-r--r--compiler/optimizing/inliner.h94
-rw-r--r--compiler/optimizing/instruction_builder.cc62
-rw-r--r--compiler/optimizing/instruction_builder.h4
-rw-r--r--compiler/optimizing/instruction_simplifier.cc61
-rw-r--r--compiler/optimizing/instruction_simplifier.h8
-rw-r--r--compiler/optimizing/intrinsics.cc109
-rw-r--r--compiler/optimizing/intrinsics.h33
-rw-r--r--compiler/optimizing/intrinsics_arm.cc408
-rw-r--r--compiler/optimizing/intrinsics_arm.h1
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc336
-rw-r--r--compiler/optimizing/intrinsics_arm64.h4
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc507
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h1
-rw-r--r--compiler/optimizing/intrinsics_mips.cc336
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc284
-rw-r--r--compiler/optimizing/intrinsics_x86.cc170
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc144
-rw-r--r--compiler/optimizing/licm_test.cc13
-rw-r--r--compiler/optimizing/load_store_elimination.cc157
-rw-r--r--compiler/optimizing/locations.h22
-rw-r--r--compiler/optimizing/loop_optimization.cc836
-rw-r--r--compiler/optimizing/loop_optimization.h125
-rw-r--r--compiler/optimizing/loop_optimization_test.cc2
-rw-r--r--compiler/optimizing/nodes.cc93
-rw-r--r--compiler/optimizing/nodes.h152
-rw-r--r--compiler/optimizing/nodes_vector.h604
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc148
-rw-r--r--compiler/optimizing/optimizing_compiler.cc101
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h36
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc8
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/reference_type_propagation.cc4
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc2
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc28
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc41
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc71
-rw-r--r--compiler/optimizing/register_allocator_test.cc12
-rw-r--r--compiler/optimizing/scheduler.h6
-rw-r--r--compiler/optimizing/sharpening.cc38
-rw-r--r--compiler/optimizing/sharpening.h12
-rw-r--r--compiler/optimizing/side_effects_analysis.h6
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc20
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h114
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc233
-rw-r--r--compiler/optimizing/stack_map_stream.cc72
-rw-r--r--compiler/optimizing/stack_map_stream.h13
-rw-r--r--compiler/optimizing/stack_map_test.cc37
85 files changed, 14263 insertions, 2620 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2ee4db923a..476906a768 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor {
has_dom_based_dynamic_bce_(false),
initial_block_size_(graph->GetBlocks().size()),
side_effects_(side_effects),
- induction_range_(induction_analysis) {}
+ induction_range_(induction_analysis),
+ next_(nullptr) {}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
DCHECK(!IsAddedBlock(block));
@@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
HBasicBlock* block = bounds_check->GetBlock();
block->InsertInstructionBefore(condition, bounds_check);
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
}
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5d58207511..cb6e14b2bd 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public testing::Test {
void RunBCE() {
graph_->BuildDominatorTree();
- InstructionSimplifier(graph_).Run();
+ InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
SideEffectsAnalysis side_effects(graph_);
side_effects.Run();
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
deleted file mode 100644
index 133afa47fe..0000000000
--- a/compiler/optimizing/bytecode_utils.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-
-#include "base/arena_object.h"
-#include "dex_file.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-
-namespace art {
-
-class CodeItemIterator : public ValueObject {
- public:
- explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {}
- CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc)
- : code_ptr_(code_item.insns_ + start_dex_pc),
- code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
- dex_pc_(start_dex_pc) {}
-
- bool Done() const { return code_ptr_ >= code_end_; }
- bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; }
-
- const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); }
- uint32_t CurrentDexPc() const { return dex_pc_; }
-
- void Advance() {
- DCHECK(!Done());
- size_t instruction_size = CurrentInstruction().SizeInCodeUnits();
- code_ptr_ += instruction_size;
- dex_pc_ += instruction_size;
- }
-
- private:
- const uint16_t* code_ptr_;
- const uint16_t* const code_end_;
- uint32_t dex_pc_;
-
- DISALLOW_COPY_AND_ASSIGN(CodeItemIterator);
-};
-
-class DexSwitchTable : public ValueObject {
- public:
- DexSwitchTable(const Instruction& instruction, uint32_t dex_pc)
- : instruction_(instruction),
- dex_pc_(dex_pc),
- sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) {
- int32_t table_offset = instruction.VRegB_31t();
- const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
- DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature)
- : static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
- num_entries_ = table[1];
- values_ = reinterpret_cast<const int32_t*>(&table[2]);
- }
-
- uint16_t GetNumEntries() const {
- return num_entries_;
- }
-
- void CheckIndex(size_t index) const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
- }
- }
-
- int32_t GetEntryAt(size_t index) const {
- CheckIndex(index);
- return values_[index];
- }
-
- uint32_t GetDexPcForIndex(size_t index) const {
- CheckIndex(index);
- return dex_pc_ +
- (reinterpret_cast<const int16_t*>(values_ + index) -
- reinterpret_cast<const int16_t*>(&instruction_));
- }
-
- // Index of the first value in the table.
- size_t GetFirstValueIndex() const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- return num_entries_;
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- return 1;
- }
- }
-
- bool IsSparse() const { return sparse_; }
-
- bool ShouldBuildDecisionTree() {
- return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold;
- }
-
- private:
- const Instruction& instruction_;
- const uint32_t dex_pc_;
-
- // Whether this is a sparse-switch table (or a packed-switch one).
- const bool sparse_;
-
- // This can't be const as it needs to be computed off of the given instruction, and complicated
- // expressions in the initializer list seemed very ugly.
- uint16_t num_entries_;
-
- const int32_t* values_;
-
- // The number of entries in a packed switch before we use a jump table or specified
- // compare/jump series.
- static constexpr uint16_t kSmallSwitchThreshold = 3;
-
- DISALLOW_COPY_AND_ASSIGN(DexSwitchTable);
-};
-
-class DexSwitchTableIterator {
- public:
- explicit DexSwitchTableIterator(const DexSwitchTable& table)
- : table_(table),
- num_entries_(static_cast<size_t>(table_.GetNumEntries())),
- first_target_offset_(table_.GetFirstValueIndex()),
- index_(0u) {}
-
- bool Done() const { return index_ >= num_entries_; }
- bool IsLast() const { return index_ == num_entries_ - 1; }
-
- void Advance() {
- DCHECK(!Done());
- index_++;
- }
-
- int32_t CurrentKey() const {
- return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_;
- }
-
- int32_t CurrentTargetOffset() const {
- return table_.GetEntryAt(index_ + first_target_offset_);
- }
-
- uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); }
-
- private:
- const DexSwitchTable& table_;
- const size_t num_entries_;
- const size_t first_target_offset_;
-
- size_t index_;
-};
-
-inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) {
- return CodeItemIterator(code_item, dex_pc).CurrentInstruction();
-}
-
-inline bool IsThrowingDexInstruction(const Instruction& instruction) {
- // Special-case MONITOR_EXIT which is a throwing instruction but the verifier
- // guarantees that it will never throw. This is necessary to avoid rejecting
- // 'synchronized' blocks/methods.
- return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT;
-}
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index fe423012ca..048073e37a 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor {
: HGraphVisitor(graph),
block_has_cha_guard_(GetGraph()->GetBlocks().size(),
0,
- graph->GetArena()->Adapter(kArenaAllocCHA)) {
+ graph->GetArena()->Adapter(kArenaAllocCHA)),
+ instruction_iterator_(nullptr) {
number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
DCHECK_NE(number_of_guards_to_visit_, 0u);
// Will recount number of guards during guard optimization.
@@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
HInstruction* suspend = loop_info->GetSuspendCheck();
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 424b8507fb..b7c80756b0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -654,8 +654,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
}
}
-size_t CodeGenerator::ComputeStackMapsSize() {
- return stack_map_stream_.PrepareForFillIn();
+void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size,
+ size_t* method_info_size) {
+ DCHECK(stack_map_size != nullptr);
+ DCHECK(method_info_size != nullptr);
+ *stack_map_size = stack_map_stream_.PrepareForFillIn();
+ *method_info_size = stack_map_stream_.ComputeMethodInfoSize();
}
static void CheckCovers(uint32_t dex_pc,
@@ -723,10 +727,13 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
}
}
-void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
- stack_map_stream_.FillIn(region);
+void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region,
+ MemoryRegion method_info_region,
+ const DexFile::CodeItem& code_item) {
+ stack_map_stream_.FillInCodeInfo(stack_map_region);
+ stack_map_stream_.FillInMethodInfo(method_info_region);
if (kIsDebugBuild) {
- CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+ CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), code_item);
}
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b912672792..ea463eeb62 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -341,8 +341,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
}
- void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
- size_t ComputeStackMapsSize();
+ void BuildStackMaps(MemoryRegion stack_map_region,
+ MemoryRegion method_info_region,
+ const DexFile::CodeItem& code_item);
+ void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
size_t GetNumberOfJitRoots() const {
return jit_string_roots_.size() + jit_class_roots_.size();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2560c9fedd..d7cc577580 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -1564,10 +1564,346 @@ static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, CodeGenera
}
}
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
+ Primitive::Type type = instruction->InputAt(0)->GetType();
+ Location lhs_loc = instruction->GetLocations()->InAt(0);
+ Location rhs_loc = instruction->GetLocations()->InAt(1);
+ if (rhs_loc.IsConstant()) {
+ // 0.0 is the only immediate that can be encoded directly in
+ // a VCMP instruction.
+ //
+ // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+ // specify that in a floating-point comparison, positive zero
+ // and negative zero are considered equal, so we can use the
+ // literal 0.0 for both cases here.
+ //
+ // Note however that some methods (Float.equal, Float.compare,
+ // Float.compareTo, Double.equal, Double.compare,
+ // Double.compareTo, Math.max, Math.min, StrictMath.max,
+ // StrictMath.min) consider 0.0 to be (strictly) greater than
+ // -0.0. So if we ever translate calls to these methods into a
+ // HCompare instruction, we must handle the -0.0 case with
+ // care here.
+ DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+ if (type == Primitive::kPrimFloat) {
+ __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ } else {
+ if (type == Primitive::kPrimFloat) {
+ __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+ }
+ }
+}
+
+static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = condition->GetLocations();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<Condition, Condition> ret;
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+
+ DCHECK(right.IsConstant());
+
+ const Register left_high = left.AsRegisterPairHigh<Register>();
+ const Register left_low = left.AsRegisterPairLow<Register>();
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE:
+ __ CmpConstant(left_high, High32Bits(value));
+ __ it(EQ);
+ __ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+ break;
+ case kCondLE:
+ case kCondGT:
+ // Trivially true or false.
+ if (value == std::numeric_limits<int64_t>::max()) {
+ __ cmp(left_low, ShifterOperand(left_low));
+ ret = cond == kCondLE ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+ break;
+ }
+
+ if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
+ cond = kCondLT;
+ opposite = kCondGE;
+ } else {
+ DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
+ cond = kCondGE;
+ opposite = kCondLT;
+ }
+
+ value++;
+ FALLTHROUGH_INTENDED;
+ case kCondGE:
+ case kCondLT:
+ __ CmpConstant(left_low, Low32Bits(value));
+ __ sbcs(IP, left_high, ShifterOperand(High32Bits(value)));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+
+ return ret;
+}
+
+static std::pair<Condition, Condition> GenerateLongTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = condition->GetLocations();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<Condition, Condition> ret;
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ DCHECK(right.IsRegisterPair());
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE:
+ __ cmp(left.AsRegisterPairHigh<Register>(),
+ ShifterOperand(right.AsRegisterPairHigh<Register>()));
+ __ it(EQ);
+ __ cmp(left.AsRegisterPairLow<Register>(),
+ ShifterOperand(right.AsRegisterPairLow<Register>()),
+ EQ);
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+ break;
+ case kCondLE:
+ case kCondGT:
+ if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
+ cond = kCondGE;
+ opposite = kCondLT;
+ } else {
+ DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
+ cond = kCondLT;
+ opposite = kCondGE;
+ }
+
+ std::swap(left, right);
+ FALLTHROUGH_INTENDED;
+ case kCondGE:
+ case kCondLT:
+ __ cmp(left.AsRegisterPairLow<Register>(),
+ ShifterOperand(right.AsRegisterPairLow<Register>()));
+ __ sbcs(IP,
+ left.AsRegisterPairHigh<Register>(),
+ ShifterOperand(right.AsRegisterPairHigh<Register>()));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+
+ return ret;
+}
+
+static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARM* codegen) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const Primitive::Type type = condition->GetLeft()->GetType();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ std::pair<Condition, Condition> ret;
+ const Location right = locations->InAt(1);
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ if (type == Primitive::kPrimLong) {
+ ret = locations->InAt(1).IsConstant()
+ ? GenerateLongTestConstant(condition, invert, codegen)
+ : GenerateLongTest(condition, invert, codegen);
+ } else if (Primitive::IsFloatingPointType(type)) {
+ GenerateVcmp(condition, codegen);
+ __ vmstat();
+ ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+ ARMFPCondition(opposite, condition->IsGtBias()));
+ } else {
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ const Register left = locations->InAt(0).AsRegister<Register>();
+
+ if (right.IsRegister()) {
+ __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+ } else {
+ DCHECK(right.IsConstant());
+ __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ }
+
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ }
+
+ return ret;
+}
+
+static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
+ if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
+
+ if (locations->InAt(1).IsConstant()) {
+ const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+ ShifterOperand so;
+
+ if (c < kCondLT || c > kCondGE) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the least significant half of the first input to be compared
+ // is in a low register (the other half is read outside an IT block), and
+ // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+ // encoding can be used.
+ if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+ !IsUint<8>(Low32Bits(value))) {
+ return false;
+ }
+ } else if (c == kCondLE || c == kCondGT) {
+ if (value < std::numeric_limits<int64_t>::max() &&
+ !assembler->ShifterOperandCanHold(kNoRegister,
+ kNoRegister,
+ SBC,
+ High32Bits(value + 1),
+ kCcSet,
+ &so)) {
+ return false;
+ }
+ } else if (!assembler->ShifterOperandCanHold(kNoRegister,
+ kNoRegister,
+ SBC,
+ High32Bits(value),
+ kCcSet,
+ &so)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+ const Primitive::Type type = constant->GetType();
+ bool ret = false;
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ if (type == Primitive::kPrimLong) {
+ const uint64_t value = constant->AsLongConstant()->GetValueAsUint64();
+
+ ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+ } else {
+ ret = IsUint<8>(CodeGenerator::GetInt32ValueOf(constant));
+ }
+
+ return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+ DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+ if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+ return Location::ConstantLocation(constant->AsConstant());
+ }
+
+ return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that we are not dealing with floating-point output (there is no
+ // 16-bit VMOV encoding).
+ if (!out.IsRegister() && !out.IsRegisterPair()) {
+ return false;
+ }
+
+ // For constants, we also check that the output is in one or two low registers,
+ // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+ // MOV encoding can be used.
+ if (src.IsConstant()) {
+ if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+ return false;
+ }
+
+ if (out.IsRegister()) {
+ if (!ArmAssembler::IsLowRegister(out.AsRegister<Register>())) {
+ return false;
+ }
+ } else {
+ DCHECK(out.IsRegisterPair());
+
+ if (!ArmAssembler::IsLowRegister(out.AsRegisterPairHigh<Register>())) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
#undef __
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) {
+ DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
+
+ const HBasicBlock* const block = instruction->GetBlock();
+ const HLoopInformation* const info = block->GetLoopInformation();
+ HInstruction* const next = instruction->GetNext();
+
+ // Avoid a branch to a branch.
+ if (next->IsGoto() && (info == nullptr ||
+ !info->IsBackEdge(*block) ||
+ !info->HasSuspendCheck())) {
+ final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+ }
+
+ return final_label;
+}
+
void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Register(reg);
}
@@ -1626,8 +1962,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2094,51 +2428,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
- Primitive::Type type = instruction->InputAt(0)->GetType();
- Location lhs_loc = instruction->GetLocations()->InAt(0);
- Location rhs_loc = instruction->GetLocations()->InAt(1);
- if (rhs_loc.IsConstant()) {
- // 0.0 is the only immediate that can be encoded directly in
- // a VCMP instruction.
- //
- // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
- // specify that in a floating-point comparison, positive zero
- // and negative zero are considered equal, so we can use the
- // literal 0.0 for both cases here.
- //
- // Note however that some methods (Float.equal, Float.compare,
- // Float.compareTo, Double.equal, Double.compare,
- // Double.compareTo, Math.max, Math.min, StrictMath.max,
- // StrictMath.min) consider 0.0 to be (strictly) greater than
- // -0.0. So if we ever translate calls to these methods into a
- // HCompare instruction, we must handle the -0.0 case with
- // care here.
- DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
- if (type == Primitive::kPrimFloat) {
- __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
- } else {
- DCHECK_EQ(type, Primitive::kPrimDouble);
- __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
- }
- } else {
- if (type == Primitive::kPrimFloat) {
- __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
- } else {
- DCHECK_EQ(type, Primitive::kPrimDouble);
- __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
- FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
- }
- }
-}
-
-void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
- Label* true_label,
- Label* false_label ATTRIBUTE_UNUSED) {
- __ vmstat(); // transfer FP status register to ARM APSR.
- __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
-}
-
void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
Label* true_label,
Label* false_label) {
@@ -2155,7 +2444,6 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
// Set the conditions for the test, remembering that == needs to be
// decided using the low words.
- // TODO: consider avoiding jumps with temporary and CMP low+SBC high
switch (if_cond) {
case kCondEQ:
case kCondNE:
@@ -2226,25 +2514,38 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target_in,
Label* false_target_in) {
+ if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+ Label* non_fallthrough_target;
+ bool invert;
+
+ if (true_target_in == nullptr) {
+ DCHECK(false_target_in != nullptr);
+ non_fallthrough_target = false_target_in;
+ invert = true;
+ } else {
+ non_fallthrough_target = true_target_in;
+ invert = false;
+ }
+
+ const auto cond = GenerateTest(condition, invert, codegen_);
+
+ __ b(non_fallthrough_target, cond.first);
+
+ if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+ __ b(false_target_in);
+ }
+
+ return;
+ }
+
// Generated branching requires both targets to be explicit. If either of the
// targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
Label fallthrough_target;
Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
- Primitive::Type type = condition->InputAt(0)->GetType();
- switch (type) {
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(condition, true_target, false_target);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(condition);
- GenerateFPJumps(condition, true_target, false_target);
- break;
- default:
- LOG(FATAL) << "Unexpected compare type " << type;
- }
+ DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough_target) {
__ b(false_target);
@@ -2309,20 +2610,38 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio
return;
}
+ Label* non_fallthrough_target;
+ Condition arm_cond;
LocationSummary* locations = cond->GetLocations();
DCHECK(locations->InAt(0).IsRegister());
Register left = locations->InAt(0).AsRegister<Register>();
Location right = locations->InAt(1);
- if (right.IsRegister()) {
- __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+
+ if (true_target == nullptr) {
+ arm_cond = ARMCondition(condition->GetOppositeCondition());
+ non_fallthrough_target = false_target;
} else {
- DCHECK(right.IsConstant());
- __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ arm_cond = ARMCondition(condition->GetCondition());
+ non_fallthrough_target = true_target;
}
- if (true_target == nullptr) {
- __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+
+ if (right.IsConstant() && (arm_cond == NE || arm_cond == EQ) &&
+ CodeGenerator::GetInt32ValueOf(right.GetConstant()) == 0) {
+ if (arm_cond == EQ) {
+ __ CompareAndBranchIfZero(left, non_fallthrough_target);
+ } else {
+ DCHECK_EQ(arm_cond, NE);
+ __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+ }
} else {
- __ b(true_target, ARMCondition(condition->GetCondition()));
+ if (right.IsRegister()) {
+ __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+ } else {
+ DCHECK(right.IsConstant());
+ __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ }
+
+ __ b(non_fallthrough_target, arm_cond);
}
}
@@ -2382,28 +2701,148 @@ void InstructionCodeGeneratorARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFla
void LocationsBuilderARM::VisitSelect(HSelect* select) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
- if (Primitive::IsFloatingPointType(select->GetType())) {
+ const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+ if (is_floating_point) {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
} else {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
}
+
if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
- locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+ // The code generator handles overlap with the values, but not with the condition.
+ locations->SetOut(Location::SameAsFirstInput());
+ } else if (is_floating_point) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ if (!locations->InAt(1).IsConstant()) {
+ locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+ }
+
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
- locations->SetOut(Location::SameAsFirstInput());
}
void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
- LocationSummary* locations = select->GetLocations();
- Label false_target;
- GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
- &false_target);
- codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
- __ Bind(&false_target);
+ HInstruction* const condition = select->GetCondition();
+ const LocationSummary* const locations = select->GetLocations();
+ const Primitive::Type type = select->GetType();
+ const Location first = locations->InAt(0);
+ const Location out = locations->Out();
+ const Location second = locations->InAt(1);
+ Location src;
+
+ if (condition->IsIntConstant()) {
+ if (condition->AsIntConstant()->IsFalse()) {
+ src = first;
+ } else {
+ src = second;
+ }
+
+ codegen_->MoveLocation(out, src, type);
+ return;
+ }
+
+ if (!Primitive::IsFloatingPointType(type) &&
+ (IsBooleanValueOrMaterializedCondition(condition) ||
+ CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
+ bool invert = false;
+
+ if (out.Equals(second)) {
+ src = first;
+ invert = true;
+ } else if (out.Equals(first)) {
+ src = second;
+ } else if (second.IsConstant()) {
+ DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+ src = second;
+ } else if (first.IsConstant()) {
+ DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+ src = first;
+ invert = true;
+ } else {
+ src = second;
+ }
+
+ if (CanGenerateConditionalMove(out, src)) {
+ if (!out.Equals(first) && !out.Equals(second)) {
+ codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+ }
+
+ std::pair<Condition, Condition> cond;
+
+ if (IsBooleanValueOrMaterializedCondition(condition)) {
+ __ CmpConstant(locations->InAt(2).AsRegister<Register>(), 0);
+ cond = invert ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+ } else {
+ cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+ }
+
+ if (out.IsRegister()) {
+ ShifterOperand operand;
+
+ if (src.IsConstant()) {
+ operand = ShifterOperand(CodeGenerator::GetInt32ValueOf(src.GetConstant()));
+ } else {
+ DCHECK(src.IsRegister());
+ operand = ShifterOperand(src.AsRegister<Register>());
+ }
+
+ __ it(cond.first);
+ __ mov(out.AsRegister<Register>(), operand, cond.first);
+ } else {
+ DCHECK(out.IsRegisterPair());
+
+ ShifterOperand operand_high;
+ ShifterOperand operand_low;
+
+ if (src.IsConstant()) {
+ const int64_t value = src.GetConstant()->AsLongConstant()->GetValue();
+
+ operand_high = ShifterOperand(High32Bits(value));
+ operand_low = ShifterOperand(Low32Bits(value));
+ } else {
+ DCHECK(src.IsRegisterPair());
+ operand_high = ShifterOperand(src.AsRegisterPairHigh<Register>());
+ operand_low = ShifterOperand(src.AsRegisterPairLow<Register>());
+ }
+
+ __ it(cond.first);
+ __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond.first);
+ __ it(cond.first);
+ __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond.first);
+ }
+
+ return;
+ }
+ }
+
+ Label* false_target = nullptr;
+ Label* true_target = nullptr;
+ Label select_end;
+ Label* target = codegen_->GetFinalLabel(select, &select_end);
+
+ if (out.Equals(second)) {
+ true_target = target;
+ src = first;
+ } else {
+ false_target = target;
+ src = second;
+
+ if (!out.Equals(first)) {
+ codegen_->MoveLocation(out, first, type);
+ }
+ }
+
+ GenerateTestAndBranch(select, 2, true_target, false_target);
+ codegen_->MoveLocation(out, src, type);
+
+ if (select_end.IsLinked()) {
+ __ Bind(&select_end);
+ }
}
void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2427,7 +2866,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
break;
@@ -2454,51 +2893,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
return;
}
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- Register out = locations->Out().AsRegister<Register>();
- Label true_label, false_label;
+ const Register out = cond->GetLocations()->Out().AsRegister<Register>();
- switch (cond->InputAt(0)->GetType()) {
- default: {
- // Integer case.
- if (right.IsRegister()) {
- __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
- } else {
- DCHECK(right.IsConstant());
- __ CmpConstant(left.AsRegister<Register>(),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
- __ it(ARMCondition(cond->GetCondition()), kItElse);
- __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
- ARMCondition(cond->GetCondition()));
- __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
- ARMCondition(cond->GetOppositeCondition()));
- return;
- }
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(cond);
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
+ if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ it(condition.first);
+ __ mov(out, ShifterOperand(1), condition.first);
+ __ it(condition.second);
+ __ mov(out, ShifterOperand(0), condition.second);
+ return;
}
// Convert the jumps into the result.
Label done_label;
+ Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // False case: result = 0.
- __ Bind(&false_label);
- __ LoadImmediate(out, 0);
- __ b(&done_label);
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ Label true_label, false_label;
- // True case: result = 1.
- __ Bind(&true_label);
- __ LoadImmediate(out, 1);
- __ Bind(&done_label);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ LoadImmediate(out, 0);
+ __ b(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ LoadImmediate(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+ __ b(final_label, condition.second);
+ __ LoadImmediate(out, 1);
+ }
+
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -4029,7 +4465,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations
// rotates by swapping input regs (effectively rotating by the first 32-bits of
// a larger rotation) or flipping direction (thus treating larger right/left
// rotations as sub-word sized rotations in the other direction) as appropriate.
-void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Location rhs = locations->InAt(1);
@@ -4062,6 +4499,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
Register shift_left = locations->GetTemp(1).AsRegister<Register>();
Label end;
Label shift_by_32_plus_shift_right;
+ Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
__ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
@@ -4076,7 +4514,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
- __ b(&end);
+ __ b(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4088,7 +4526,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
}
@@ -4128,7 +4568,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) {
break;
}
case Primitive::kPrimLong: {
- HandleLongRotate(locations);
+ HandleLongRotate(ror);
break;
}
default:
@@ -4507,6 +4947,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
Label less, greater, done;
+ Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
Condition less_cond;
switch (type) {
@@ -4536,7 +4977,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
__ LoadImmediate(out, 0);
- GenerateVcmp(compare);
+ GenerateVcmp(compare, codegen_);
__ vmstat(); // transfer FP status register to ARM APSR.
less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
@@ -4546,17 +4987,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ b(&done, EQ);
+ __ b(final_label, EQ);
__ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
__ Bind(&less);
__ LoadImmediate(out, -1);
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
@@ -4892,17 +5335,29 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
return true;
}
Opcode neg_opcode = kNoOperand;
+ uint32_t neg_value = 0;
switch (opcode) {
- case AND: neg_opcode = BIC; value = ~value; break;
- case ORR: neg_opcode = ORN; value = ~value; break;
- case ADD: neg_opcode = SUB; value = -value; break;
- case ADC: neg_opcode = SBC; value = ~value; break;
- case SUB: neg_opcode = ADD; value = -value; break;
- case SBC: neg_opcode = ADC; value = ~value; break;
+ case AND: neg_opcode = BIC; neg_value = ~value; break;
+ case ORR: neg_opcode = ORN; neg_value = ~value; break;
+ case ADD: neg_opcode = SUB; neg_value = -value; break;
+ case ADC: neg_opcode = SBC; neg_value = ~value; break;
+ case SUB: neg_opcode = ADD; neg_value = -value; break;
+ case SBC: neg_opcode = ADC; neg_value = ~value; break;
+ case MOV: neg_opcode = MVN; neg_value = ~value; break;
default:
return false;
}
- return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
+
+ if (assembler->ShifterOperandCanHold(kNoRegister,
+ kNoRegister,
+ neg_opcode,
+ neg_value,
+ set_cc,
+ &so)) {
+ return true;
+ }
+
+ return opcode == AND && IsPowerOfTwo(value + 1);
}
void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -5322,6 +5777,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5330,13 +5786,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
out_loc.AsRegister<Register>(),
obj,
data_offset + const_index);
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
out_loc.AsRegister<Register>(),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5360,17 +5818,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ b(&uncompressed_load, CS);
__ ldrb(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ ldrh(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
@@ -5595,6 +6056,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -5616,7 +6078,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ b(&done);
+ __ b(final_label);
__ Bind(&non_zero);
}
@@ -5804,21 +6266,59 @@ void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+
+ HInstruction* index = instruction->InputAt(0);
+ HInstruction* length = instruction->InputAt(1);
+ // If both index and length are constants we can statically check the bounds. But if at least one
+ // of them is not encodable ArmEncodableConstantOrRegister will create
+ // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+ // locations.
+ bool both_const = index->IsConstant() && length->IsConstant();
+ locations->SetInAt(0, both_const
+ ? Location::ConstantLocation(index->AsConstant())
+ : ArmEncodableConstantOrRegister(index, CMP));
+ locations->SetInAt(1, both_const
+ ? Location::ConstantLocation(length->AsConstant())
+ : ArmEncodableConstantOrRegister(length, CMP));
}
void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
LocationSummary* locations = instruction->GetLocations();
- SlowPathCodeARM* slow_path =
- new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
- codegen_->AddSlowPath(slow_path);
-
- Register index = locations->InAt(0).AsRegister<Register>();
- Register length = locations->InAt(1).AsRegister<Register>();
+ Location index_loc = locations->InAt(0);
+ Location length_loc = locations->InAt(1);
+
+ if (length_loc.IsConstant()) {
+ int32_t length = helpers::Int32ConstantFrom(length_loc);
+ if (index_loc.IsConstant()) {
+ // BCE will remove the bounds check if we are guaranteed to pass.
+ int32_t index = helpers::Int32ConstantFrom(index_loc);
+ if (index < 0 || index >= length) {
+ SlowPathCodeARM* slow_path =
+ new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
+ } else {
+ // Some optimization after BCE may have generated this, and we should not
+ // generate a bounds check if it is a valid range.
+ }
+ return;
+ }
- __ cmp(index, ShifterOperand(length));
- __ b(slow_path->GetEntryLabel(), HS);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+ __ cmp(index_loc.AsRegister<Register>(), ShifterOperand(length));
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), HS);
+ } else {
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+ if (index_loc.IsConstant()) {
+ int32_t index = helpers::Int32ConstantFrom(index_loc);
+ __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index));
+ } else {
+ __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index_loc.AsRegister<Register>()));
+ }
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), LS);
+ }
}
void CodeGeneratorARM::MarkGCCard(Register temp,
@@ -6558,13 +7058,16 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- Label done, zero;
+ Label done;
+ Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
// avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &zero);
+ DCHECK_NE(out, obj);
+ __ LoadImmediate(out, 0);
+ __ CompareAndBranchIfZero(obj, final_label);
}
switch (type_check_kind) {
@@ -6576,11 +7079,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
class_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ cmp(out, ShifterOperand(cls));
// Classes must be equal for the instanceof to succeed.
- __ b(&zero, NE);
- __ LoadImmediate(out, 1);
- __ b(&done);
+ __ cmp(out, ShifterOperand(cls));
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ __ b(final_label, NE);
+ __ LoadImmediate(out, 1);
+ }
+
break;
}
@@ -6602,14 +7117,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ CompareAndBranchIfZero(out, final_label);
__ cmp(out, ShifterOperand(cls));
__ b(&loop, NE);
__ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
- __ b(&done);
- }
break;
}
@@ -6632,14 +7144,32 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ CompareAndBranchIfNonZero(out, &loop);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ b(&done);
- __ Bind(&success);
- __ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
- __ b(&done);
+ // This is essentially a null check, but it sets the condition flags to the
+ // proper value for the code that follows the loop, i.e. not `EQ`.
+ __ cmp(out, ShifterOperand(1));
+ __ b(&loop, HS);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ // If `out` is null, we use it for the result, and the condition flags
+ // have already been set to `NE`, so the IT block that comes afterwards
+ // (and which handles the successful case) turns into a NOP (instead of
+ // overwriting `out`).
+ __ Bind(&success);
+ // There is only one branch to the `success` label (which is bound to this
+ // IT block), and it has the same condition, `EQ`, so in that case the MOV
+ // is executed.
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ b(final_label);
+ __ Bind(&success);
+ __ LoadImmediate(out, 1);
}
+
break;
}
@@ -6662,14 +7192,28 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
component_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ CompareAndBranchIfZero(out, final_label);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(out, &zero);
- __ Bind(&exact_check);
- __ LoadImmediate(out, 1);
- __ b(&done);
+ __ cmp(out, ShifterOperand(0));
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (ArmAssembler::IsLowRegister(out)) {
+ __ Bind(&exact_check);
+ __ it(EQ);
+ __ mov(out, ShifterOperand(1), EQ);
+ } else {
+ __ b(final_label, NE);
+ __ Bind(&exact_check);
+ __ LoadImmediate(out, 1);
+ }
+
break;
}
@@ -6689,9 +7233,6 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), NE);
__ LoadImmediate(out, 1);
- if (zero.IsLinked()) {
- __ b(&done);
- }
break;
}
@@ -6720,18 +7261,10 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
/* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel());
- if (zero.IsLinked()) {
- __ b(&done);
- }
break;
}
}
- if (zero.IsLinked()) {
- __ Bind(&zero);
- __ LoadImmediate(out, 0);
- }
-
if (done.IsLinked()) {
__ Bind(&done);
}
@@ -6807,9 +7340,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done);
+ __ CompareAndBranchIfZero(obj, final_label);
}
switch (type_check_kind) {
@@ -6873,7 +7407,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
Label loop;
__ Bind(&loop);
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -6901,7 +7435,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -6971,7 +7505,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
@@ -7158,9 +7695,11 @@ void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first,
ShifterOperand so;
if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) {
__ and_(out, first, so);
- } else {
- DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so));
+ } else if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)) {
__ bic(out, first, ShifterOperand(~value));
+ } else {
+ DCHECK(IsPowerOfTwo(value + 1));
+ __ ubfx(out, first, 0, WhichPowerOf2(value + 1));
}
}
@@ -7846,9 +8385,7 @@ Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_fi
}
Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
- bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
- Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -7899,8 +8436,7 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
- boot_image_address_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -7934,13 +8470,6 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
target_type.dex_file,
target_type.type_index.index_));
}
- for (const auto& entry : boot_image_address_patches_) {
- DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = literal->GetLabel()->Position();
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1f68777f88..86f2f21df7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
- void HandleLongRotate(LocationSummary* locations);
+ void HandleLongRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
void GenerateWideAtomicStore(Register addr, uint32_t offset,
@@ -299,8 +299,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
- void GenerateVcmp(HInstruction* instruction);
- void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
@@ -422,6 +420,8 @@ class CodeGeneratorARM : public CodeGenerator {
return CommonGetLabelOf<Label>(block_labels_, block);
}
+ Label* GetFinalLabel(HInstruction* instruction, Label* final_label);
+
void Initialize() OVERRIDE {
block_labels_ = CommonInitializeLabels<Label>();
}
@@ -648,8 +648,6 @@ class CodeGeneratorARM : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // Deduplication map for patchable boot image addresses.
- Uint32ToLiteralMap boot_image_address_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b6c97cd23..794e05c670 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -153,7 +153,8 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
codegen->GetNumberOfFloatingPointRegisters()));
CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
- CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, fp_spills);
+ unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
+ CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
@@ -464,10 +465,13 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
: SlowPathCodeARM64(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -808,6 +812,14 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo
DCHECK(!(instruction_->IsArrayGet() &&
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP0 nor IP1, as we may use them to emit the reference
+ // load (in the call to GenerateRawReferenceLoad below), and we
+ // need the lock word to still be in `temp_` after the reference
+ // load.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
__ Bind(GetEntryLabel());
// When using MaybeGenerateReadBarrierSlow, the read barrier call is
@@ -956,6 +968,14 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
Location field_offset = index_;
DCHECK(field_offset.IsRegister()) << field_offset;
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP0 nor IP1, as we may use them to emit the reference
+ // load (in the call to GenerateRawReferenceLoad below), and we
+ // need the lock word to still be in `temp_` after the reference
+ // load.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
__ Bind(GetEntryLabel());
// /* int32_t */ monitor = obj->monitor_
@@ -1134,7 +1154,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -1395,8 +1415,6 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2381,7 +2399,7 @@ void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
default:
@@ -2551,7 +2569,7 @@ void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instr
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
@@ -2622,6 +2640,9 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary::kNoCall);
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2657,7 +2678,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
- Register temp = temps.AcquireW();
+ Register temp = WRegisterFrom(locations->GetTemp(0));
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -3264,7 +3285,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
LocationSummary* locations = instruction->GetLocations();
Register out = OutputRegister(instruction);
@@ -3614,7 +3635,7 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) {
if (Primitive::IsFloatingPointType(select->GetType())) {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
@@ -3637,7 +3658,7 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) {
: Location::ConstantLocation(cst_true_value));
locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
: Location::ConstantLocation(cst_false_value));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
@@ -4289,7 +4310,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
}
void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -4302,7 +4323,7 @@ void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* inv
// art::PrepareForRegisterAllocation.
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
- IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -4523,9 +4544,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLi
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
uint64_t address) {
- bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
- Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
@@ -4593,8 +4612,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
pc_relative_type_patches_.size() +
- type_bss_entry_patches_.size() +
- boot_image_address_patches_.size();
+ type_bss_entry_patches_.size();
linker_patches->reserve(size);
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -4628,11 +4646,6 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_type.dex_file,
target_type.type_index.index_));
}
- for (const auto& entry : boot_image_address_patches_) {
- DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- vixl::aarch64::Literal<uint32_t>* literal = entry.second;
- linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
- }
DCHECK_EQ(size, linker_patches->size());
}
@@ -5511,7 +5524,11 @@ void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 231fb057c8..10d8b841f8 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -318,6 +318,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ vixl::aarch64::MemOperand CreateVecMemRegisters(
+ HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load);
+
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
@@ -771,8 +776,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // Deduplication map for patchable boot image addresses.
- Uint32ToLiteralMap boot_image_address_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 0239ac9c45..cce412b314 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -42,6 +42,7 @@ using helpers::DRegisterFrom;
using helpers::DWARFReg;
using helpers::HighDRegisterFrom;
using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
using helpers::InputOperandAt;
using helpers::InputRegister;
using helpers::InputRegisterAt;
@@ -53,6 +54,7 @@ using helpers::Int64ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
+using helpers::OperandFrom;
using helpers::OutputRegister;
using helpers::OutputSRegister;
using helpers::OutputVRegister;
@@ -830,6 +832,12 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS
DCHECK(!(instruction_->IsArrayGet() &&
instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
+ // Temporary register `temp_`, used to store the lock word, must
+ // not be IP, as we may use it to emit the reference load (in the
+ // call to GenerateRawReferenceLoad below), and we need the lock
+ // word to still be in `temp_` after the reference load.
+ DCHECK(!temp_.Is(ip));
+
__ Bind(GetEntryLabel());
// When using MaybeGenerateReadBarrierSlow, the read barrier call is
@@ -971,6 +979,12 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
Location field_offset = index_;
DCHECK(field_offset.IsRegisterPair()) << field_offset;
+ // Temporary register `temp1_`, used to store the lock word, must
+ // not be IP, as we may use it to emit the reference load (in the
+ // call to GenerateRawReferenceLoad below), and we need the lock
+ // word to still be in `temp1_` after the reference load.
+ DCHECK(!temp1_.Is(ip));
+
__ Bind(GetEntryLabel());
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
@@ -1161,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -1640,8 +1654,335 @@ static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
}
}
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
+ const Location rhs_loc = instruction->GetLocations()->InAt(1);
+ if (rhs_loc.IsConstant()) {
+ // 0.0 is the only immediate that can be encoded directly in
+ // a VCMP instruction.
+ //
+ // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+ // specify that in a floating-point comparison, positive zero
+ // and negative zero are considered equal, so we can use the
+ // literal 0.0 for both cases here.
+ //
+ // Note however that some methods (Float.equal, Float.compare,
+ // Float.compareTo, Double.equal, Double.compare,
+ // Double.compareTo, Math.max, Math.min, StrictMath.max,
+ // StrictMath.min) consider 0.0 to be (strictly) greater than
+ // -0.0. So if we ever translate calls to these methods into a
+ // HCompare instruction, we must handle the -0.0 case with
+ // care here.
+ DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+
+ const Primitive::Type type = instruction->InputAt(0)->GetType();
+
+ if (type == Primitive::kPrimFloat) {
+ __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
+ }
+ } else {
+ __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
+ }
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
+ HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = condition->GetLocations();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+
+ DCHECK(right.IsConstant());
+
+ const vixl32::Register left_high = HighRegisterFrom(left);
+ const vixl32::Register left_low = LowRegisterFrom(left);
+ int64_t value = Int64ConstantFrom(right);
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE: {
+ __ Cmp(left_high, High32Bits(value));
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ cmp(eq, left_low, Low32Bits(value));
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+ break;
+ }
+ case kCondLE:
+ case kCondGT:
+ // Trivially true or false.
+ if (value == std::numeric_limits<int64_t>::max()) {
+ __ Cmp(left_low, left_low);
+ ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+ break;
+ }
+
+ if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
+ cond = kCondLT;
+ opposite = kCondGE;
+ } else {
+ DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
+ cond = kCondGE;
+ opposite = kCondLT;
+ }
+
+ value++;
+ FALLTHROUGH_INTENDED;
+ case kCondGE:
+ case kCondLT: {
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+ __ Cmp(left_low, Low32Bits(value));
+ __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+
+ return ret;
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
+ HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ const LocationSummary* const locations = condition->GetLocations();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+
+ DCHECK(right.IsRegisterPair());
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE: {
+ __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
+ ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+ break;
+ }
+ case kCondLE:
+ case kCondGT:
+ if (cond == kCondLE) {
+ DCHECK_EQ(opposite, kCondGT);
+ cond = kCondGE;
+ opposite = kCondLT;
+ } else {
+ DCHECK_EQ(cond, kCondGT);
+ DCHECK_EQ(opposite, kCondLE);
+ cond = kCondLT;
+ opposite = kCondGE;
+ }
+
+ std::swap(left, right);
+ FALLTHROUGH_INTENDED;
+ case kCondGE:
+ case kCondLT: {
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+ __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
+ __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+
+ return ret;
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
+ bool invert,
+ CodeGeneratorARMVIXL* codegen) {
+ const Primitive::Type type = condition->GetLeft()->GetType();
+ IfCondition cond = condition->GetCondition();
+ IfCondition opposite = condition->GetOppositeCondition();
+ std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+
+ if (invert) {
+ std::swap(cond, opposite);
+ }
+
+ if (type == Primitive::kPrimLong) {
+ ret = condition->GetLocations()->InAt(1).IsConstant()
+ ? GenerateLongTestConstant(condition, invert, codegen)
+ : GenerateLongTest(condition, invert, codegen);
+ } else if (Primitive::IsFloatingPointType(type)) {
+ GenerateVcmp(condition, codegen);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+ ARMFPCondition(opposite, condition->IsGtBias()));
+ } else {
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+ __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
+ ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+ }
+
+ return ret;
+}
+
+static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
+ if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+ const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
+
+ if (locations->InAt(1).IsConstant()) {
+ const int64_t value = Int64ConstantFrom(locations->InAt(1));
+
+ if (c < kCondLT || c > kCondGE) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the least significant half of the first input to be compared
+ // is in a low register (the other half is read outside an IT block), and
+ // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+ // encoding can be used.
+ if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+ return false;
+ }
+ // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
+ // the previous one, but are not strictly necessary.
+ } else if (c == kCondLE || c == kCondGT) {
+ if (value < std::numeric_limits<int64_t>::max() &&
+ !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
+ return false;
+ }
+ } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+ const Primitive::Type type = constant->GetType();
+ bool ret = false;
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ if (type == Primitive::kPrimLong) {
+ const uint64_t value = Uint64ConstantFrom(constant);
+
+ ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+ } else {
+ ret = IsUint<8>(Int32ConstantFrom(constant));
+ }
+
+ return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+ DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+ if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+ return Location::ConstantLocation(constant->AsConstant());
+ }
+
+ return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that we are not dealing with floating-point output (there is no
+ // 16-bit VMOV encoding).
+ if (!out.IsRegister() && !out.IsRegisterPair()) {
+ return false;
+ }
+
+ // For constants, we also check that the output is in one or two low registers,
+ // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+ // MOV encoding can be used.
+ if (src.IsConstant()) {
+ if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+ return false;
+ }
+
+ if (out.IsRegister()) {
+ if (!RegisterFrom(out).IsLow()) {
+ return false;
+ }
+ } else {
+ DCHECK(out.IsRegisterPair());
+
+ if (!HighRegisterFrom(out).IsLow()) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
#undef __
+vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
+ vixl32::Label* final_label) {
+ DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
+
+ const HBasicBlock* const block = instruction->GetBlock();
+ const HLoopInformation* const info = block->GetLoopInformation();
+ HInstruction* const next = instruction->GetNext();
+
+ // Avoid a branch to a branch.
+ if (next->IsGoto() && (info == nullptr ||
+ !info->IsBackEdge(*block) ||
+ !info->HasSuspendCheck())) {
+ final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+ }
+
+ return final_label;
+}
+
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
const ArmInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -1671,23 +2012,16 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
- // Give d14 and d15 as scratch registers to VIXL.
- // They are removed from the register allocator in `SetupBlockedRegisters()`.
- // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
- // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
- // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
- // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
- // also want to investigate giving those 14 other D registers to the allocator.
- GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
- GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+ // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
+ // S0-S31, which alias to D0-D15.
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
}
void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
@@ -1753,13 +2087,6 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
// Reserve temp register.
blocked_core_registers_[IP] = true;
- // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
- // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
- blocked_fpu_registers_[28] = true;
- blocked_fpu_registers_[29] = true;
- blocked_fpu_registers_[30] = true;
- blocked_fpu_registers_[31] = true;
-
if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
@@ -2135,51 +2462,6 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) {
- Primitive::Type type = instruction->InputAt(0)->GetType();
- Location lhs_loc = instruction->GetLocations()->InAt(0);
- Location rhs_loc = instruction->GetLocations()->InAt(1);
- if (rhs_loc.IsConstant()) {
- // 0.0 is the only immediate that can be encoded directly in
- // a VCMP instruction.
- //
- // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
- // specify that in a floating-point comparison, positive zero
- // and negative zero are considered equal, so we can use the
- // literal 0.0 for both cases here.
- //
- // Note however that some methods (Float.equal, Float.compare,
- // Float.compareTo, Double.equal, Double.compare,
- // Double.compareTo, Math.max, Math.min, StrictMath.max,
- // StrictMath.min) consider 0.0 to be (strictly) greater than
- // -0.0. So if we ever translate calls to these methods into a
- // HCompare instruction, we must handle the -0.0 case with
- // care here.
- DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
- if (type == Primitive::kPrimFloat) {
- __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
- } else {
- DCHECK_EQ(type, Primitive::kPrimDouble);
- __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0);
- }
- } else {
- if (type == Primitive::kPrimFloat) {
- __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
- } else {
- DCHECK_EQ(type, Primitive::kPrimDouble);
- __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc));
- }
- }
-}
-
-void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
- vixl32::Label* true_label,
- vixl32::Label* false_label ATTRIBUTE_UNUSED) {
- // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS).
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label);
-}
-
void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
vixl32::Label* true_label,
vixl32::Label* false_label) {
@@ -2196,7 +2478,6 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
// Set the conditions for the test, remembering that == needs to be
// decided using the low words.
- // TODO: consider avoiding jumps with temporary and CMP low+SBC high
switch (if_cond) {
case kCondEQ:
case kCondNE:
@@ -2267,31 +2548,44 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target_in,
vixl32::Label* false_target_in) {
+ if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+ vixl32::Label* non_fallthrough_target;
+ bool invert;
+
+ if (true_target_in == nullptr) {
+ DCHECK(false_target_in != nullptr);
+ non_fallthrough_target = false_target_in;
+ invert = true;
+ } else {
+ non_fallthrough_target = true_target_in;
+ invert = false;
+ }
+
+ const auto cond = GenerateTest(condition, invert, codegen_);
+
+ __ B(cond.first, non_fallthrough_target);
+
+ if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+ __ B(false_target_in);
+ }
+
+ return;
+ }
+
// Generated branching requires both targets to be explicit. If either of the
// targets is nullptr (fallthrough) use and bind `fallthrough` instead.
vixl32::Label fallthrough;
vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
- Primitive::Type type = condition->InputAt(0)->GetType();
- switch (type) {
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(condition, true_target, false_target);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(condition);
- GenerateFPJumps(condition, true_target, false_target);
- break;
- default:
- LOG(FATAL) << "Unexpected compare type " << type;
- }
+ DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough) {
__ B(false_target);
}
- if (true_target_in == nullptr || false_target_in == nullptr) {
+ if (fallthrough.IsReferenced()) {
__ Bind(&fallthrough);
}
}
@@ -2357,20 +2651,29 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru
return;
}
- LocationSummary* locations = cond->GetLocations();
- DCHECK(locations->InAt(0).IsRegister());
- vixl32::Register left = InputRegisterAt(cond, 0);
- Location right = locations->InAt(1);
- if (right.IsRegister()) {
- __ Cmp(left, InputRegisterAt(cond, 1));
+ vixl32::Label* non_fallthrough_target;
+ vixl32::Condition arm_cond = vixl32::Condition::None();
+ const vixl32::Register left = InputRegisterAt(cond, 0);
+ const Operand right = InputOperandAt(cond, 1);
+
+ if (true_target == nullptr) {
+ arm_cond = ARMCondition(condition->GetOppositeCondition());
+ non_fallthrough_target = false_target;
} else {
- DCHECK(right.IsConstant());
- __ Cmp(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ arm_cond = ARMCondition(condition->GetCondition());
+ non_fallthrough_target = true_target;
}
- if (true_target == nullptr) {
- __ B(ARMCondition(condition->GetOppositeCondition()), false_target);
+
+ if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
+ if (arm_cond.Is(eq)) {
+ __ CompareAndBranchIfZero(left, non_fallthrough_target);
+ } else {
+ DCHECK(arm_cond.Is(ne));
+ __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+ }
} else {
- __ B(ARMCondition(condition->GetCondition()), true_target);
+ __ Cmp(left, right);
+ __ B(arm_cond, non_fallthrough_target);
}
}
@@ -2431,29 +2734,145 @@ void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimiz
void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
- if (Primitive::IsFloatingPointType(select->GetType())) {
+ const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+ if (is_floating_point) {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
} else {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
}
+
if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
- locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+ // The code generator handles overlap with the values, but not with the condition.
+ locations->SetOut(Location::SameAsFirstInput());
+ } else if (is_floating_point) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ if (!locations->InAt(1).IsConstant()) {
+ locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+ }
+
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
- locations->SetOut(Location::SameAsFirstInput());
}
void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
- LocationSummary* locations = select->GetLocations();
- vixl32::Label false_target;
- GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
- &false_target,
- /* far_target */ false);
- codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
- __ Bind(&false_target);
+ HInstruction* const condition = select->GetCondition();
+ const LocationSummary* const locations = select->GetLocations();
+ const Primitive::Type type = select->GetType();
+ const Location first = locations->InAt(0);
+ const Location out = locations->Out();
+ const Location second = locations->InAt(1);
+ Location src;
+
+ if (condition->IsIntConstant()) {
+ if (condition->AsIntConstant()->IsFalse()) {
+ src = first;
+ } else {
+ src = second;
+ }
+
+ codegen_->MoveLocation(out, src, type);
+ return;
+ }
+
+ if (!Primitive::IsFloatingPointType(type) &&
+ (IsBooleanValueOrMaterializedCondition(condition) ||
+ CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
+ bool invert = false;
+
+ if (out.Equals(second)) {
+ src = first;
+ invert = true;
+ } else if (out.Equals(first)) {
+ src = second;
+ } else if (second.IsConstant()) {
+ DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+ src = second;
+ } else if (first.IsConstant()) {
+ DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+ src = first;
+ invert = true;
+ } else {
+ src = second;
+ }
+
+ if (CanGenerateConditionalMove(out, src)) {
+ if (!out.Equals(first) && !out.Equals(second)) {
+ codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+ }
+
+ std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
+
+ if (IsBooleanValueOrMaterializedCondition(condition)) {
+ __ Cmp(InputRegisterAt(select, 2), 0);
+ cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+ } else {
+ cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+ }
+
+ const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ instr_count * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ if (out.IsRegister()) {
+ __ it(cond.first);
+ __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
+ } else {
+ DCHECK(out.IsRegisterPair());
+
+ Operand operand_high(0);
+ Operand operand_low(0);
+
+ if (src.IsConstant()) {
+ const int64_t value = Int64ConstantFrom(src);
+
+ operand_high = High32Bits(value);
+ operand_low = Low32Bits(value);
+ } else {
+ DCHECK(src.IsRegisterPair());
+ operand_high = HighRegisterFrom(src);
+ operand_low = LowRegisterFrom(src);
+ }
+
+ __ it(cond.first);
+ __ mov(cond.first, LowRegisterFrom(out), operand_low);
+ __ it(cond.first);
+ __ mov(cond.first, HighRegisterFrom(out), operand_high);
+ }
+
+ return;
+ }
+ }
+
+ vixl32::Label* false_target = nullptr;
+ vixl32::Label* true_target = nullptr;
+ vixl32::Label select_end;
+ vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
+
+ if (out.Equals(second)) {
+ true_target = target;
+ src = first;
+ } else {
+ false_target = target;
+ src = second;
+
+ if (!out.Equals(first)) {
+ codegen_->MoveLocation(out, first, type);
+ }
+ }
+
+ GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
+ codegen_->MoveLocation(out, src, type);
+
+ if (select_end.IsReferenced()) {
+ __ Bind(&select_end);
+ }
}
void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2477,7 +2896,7 @@ void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
if (!cond->IsEmittedAtUseSite()) {
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
break;
@@ -2504,50 +2923,52 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
return;
}
- Location right = cond->GetLocations()->InAt(1);
- vixl32::Register out = OutputRegister(cond);
- vixl32::Label true_label, false_label;
+ const vixl32::Register out = OutputRegister(cond);
- switch (cond->InputAt(0)->GetType()) {
- default: {
- // Integer case.
- if (right.IsRegister()) {
- __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
- } else {
- DCHECK(right.IsConstant());
- __ Cmp(InputRegisterAt(cond, 0),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
- ExactAssemblyScope aas(GetVIXLAssembler(),
- 3 * vixl32::kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ ite(ARMCondition(cond->GetCondition()));
- __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
- __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
- return;
- }
- case Primitive::kPrimLong:
- GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- GenerateVcmp(cond);
- GenerateFPJumps(cond, &true_label, &false_label);
- break;
+ if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 4 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(condition.first);
+ __ mov(condition.first, out, 1);
+ __ it(condition.second);
+ __ mov(condition.second, out, 0);
+ return;
}
// Convert the jumps into the result.
vixl32::Label done_label;
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // False case: result = 0.
- __ Bind(&false_label);
- __ Mov(out, 0);
- __ B(&done_label);
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ vixl32::Label true_label, false_label;
- // True case: result = 1.
- __ Bind(&true_label);
- __ Mov(out, 1);
- __ Bind(&done_label);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ Mov(out, 0);
+ __ B(final_label);
+
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ Mov(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+ const auto condition = GenerateTest(cond, false, codegen_);
+
+ __ Mov(LeaveFlags, out, 0);
+ __ B(condition.second, final_label, /* far_target */ false);
+ __ Mov(out, 1);
+ }
+
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -4060,6 +4481,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
vixl32::Label end;
vixl32::Label shift_by_32_plus_shift_right;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ And(shift_right, RegisterFrom(rhs), 0x1F);
__ Lsrs(shift_left, RegisterFrom(rhs), 6);
@@ -4074,7 +4496,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ Add(out_reg_lo, out_reg_lo, shift_left);
- __ B(&end);
+ __ B(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4086,7 +4508,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ Add(out_reg_lo, out_reg_lo, shift_right);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
}
@@ -4519,6 +4943,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
vixl32::Label less, greater, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
vixl32::Condition less_cond = vixl32::Condition(kNone);
switch (type) {
@@ -4546,7 +4971,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
__ Mov(out, 0);
- GenerateVcmp(compare);
+ GenerateVcmp(compare, codegen_);
// To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
__ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
@@ -4557,17 +4982,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
__ B(less_cond, &less, /* far_target */ false);
__ Bind(&greater);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
__ Bind(&less);
__ Mov(out, -1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
@@ -4916,17 +5343,24 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value,
return true;
}
Opcode neg_opcode = kNoOperand;
+ uint32_t neg_value = 0;
switch (opcode) {
- case AND: neg_opcode = BIC; value = ~value; break;
- case ORR: neg_opcode = ORN; value = ~value; break;
- case ADD: neg_opcode = SUB; value = -value; break;
- case ADC: neg_opcode = SBC; value = ~value; break;
- case SUB: neg_opcode = ADD; value = -value; break;
- case SBC: neg_opcode = ADC; value = ~value; break;
+ case AND: neg_opcode = BIC; neg_value = ~value; break;
+ case ORR: neg_opcode = ORN; neg_value = ~value; break;
+ case ADD: neg_opcode = SUB; neg_value = -value; break;
+ case ADC: neg_opcode = SBC; neg_value = ~value; break;
+ case SUB: neg_opcode = ADD; neg_value = -value; break;
+ case SBC: neg_opcode = ADC; neg_value = ~value; break;
+ case MOV: neg_opcode = MVN; neg_value = ~value; break;
default:
return false;
}
- return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc);
+
+ if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) {
+ return true;
+ }
+
+ return opcode == AND && IsPowerOfTwo(value + 1);
}
void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
@@ -5352,6 +5786,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = Int32ConstantFrom(index);
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5360,13 +5795,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
RegisterFrom(out_loc),
obj,
data_offset + const_index);
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
RegisterFrom(out_loc),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5391,15 +5828,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ B(cs, &uncompressed_load, /* far_target */ false);
__ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
__ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
}
@@ -5638,6 +6078,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -5660,7 +6101,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
// TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
// store instruction.
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ B(&done);
+ __ B(final_label);
__ Bind(&non_zero);
}
@@ -5864,20 +6305,56 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+
+ HInstruction* index = instruction->InputAt(0);
+ HInstruction* length = instruction->InputAt(1);
+ // If both index and length are constants we can statically check the bounds. But if at least one
+ // of them is not encodable ArmEncodableConstantOrRegister will create
+ // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+ // locations.
+ bool both_const = index->IsConstant() && length->IsConstant();
+ locations->SetInAt(0, both_const
+ ? Location::ConstantLocation(index->AsConstant())
+ : ArmEncodableConstantOrRegister(index, CMP));
+ locations->SetInAt(1, both_const
+ ? Location::ConstantLocation(length->AsConstant())
+ : ArmEncodableConstantOrRegister(length, CMP));
}
void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
- SlowPathCodeARMVIXL* slow_path =
- new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
- codegen_->AddSlowPath(slow_path);
-
- vixl32::Register index = InputRegisterAt(instruction, 0);
- vixl32::Register length = InputRegisterAt(instruction, 1);
+ LocationSummary* locations = instruction->GetLocations();
+ Location index_loc = locations->InAt(0);
+ Location length_loc = locations->InAt(1);
+
+ if (length_loc.IsConstant()) {
+ int32_t length = Int32ConstantFrom(length_loc);
+ if (index_loc.IsConstant()) {
+ // BCE will remove the bounds check if we are guaranteed to pass.
+ int32_t index = Int32ConstantFrom(index_loc);
+ if (index < 0 || index >= length) {
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ } else {
+ // Some optimization after BCE may have generated this, and we should not
+ // generate a bounds check if it is a valid range.
+ }
+ return;
+ }
- __ Cmp(index, length);
- __ B(hs, slow_path->GetEntryLabel());
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+ __ Cmp(RegisterFrom(index_loc), length);
+ codegen_->AddSlowPath(slow_path);
+ __ B(hs, slow_path->GetEntryLabel());
+ } else {
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+ __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
+ codegen_->AddSlowPath(slow_path);
+ __ B(ls, slow_path->GetEntryLabel());
+ }
}
void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
@@ -6107,13 +6584,16 @@ void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
// TODO(VIXL32): Double check the performance of this implementation.
UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
- vixl32::SRegister temp_1 = temps.AcquireS();
- vixl32::SRegister temp_2 = temps.AcquireS();
+ vixl32::Register temp1 = temps.Acquire();
+ ScratchRegisterScope ensure_scratch(
+ this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
+ vixl32::Register temp2(ensure_scratch.GetRegister());
- __ Vldr(temp_1, MemOperand(sp, mem1));
- __ Vldr(temp_2, MemOperand(sp, mem2));
- __ Vstr(temp_1, MemOperand(sp, mem2));
- __ Vstr(temp_2, MemOperand(sp, mem1));
+ int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+ GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
+ GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
+ GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
}
void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
@@ -6136,7 +6616,7 @@ void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
} else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
- vixl32::SRegister temp = temps.AcquireS();
+ vixl32::Register temp = temps.Acquire();
__ Vmov(temp, SRegisterFrom(source));
__ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
__ Vmov(SRegisterFrom(destination), temp);
@@ -6195,12 +6675,12 @@ void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
}
}
-void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
+ __ Push(vixl32::Register(reg));
}
-void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
+ __ Pop(vixl32::Register(reg));
}
HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
@@ -6628,13 +7108,16 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- vixl32::Label done, zero;
+ vixl32::Label done;
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
// Return 0 if `obj` is null.
// avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
+ DCHECK(!out.Is(obj));
+ __ Mov(out, 0);
+ __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
}
switch (type_check_kind) {
@@ -6646,11 +7129,28 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
class_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ Cmp(out, cls);
// Classes must be equal for the instanceof to succeed.
- __ B(ne, &zero, /* far_target */ false);
- __ Mov(out, 1);
- __ B(&done);
+ __ Cmp(out, cls);
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ Mov(LeaveFlags, out, 0);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ __ B(ne, final_label, /* far_target */ false);
+ __ Mov(out, 1);
+ }
+
break;
}
@@ -6672,14 +7172,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
__ Cmp(out, cls);
__ B(ne, &loop, /* far_target */ false);
__ Mov(out, 1);
- if (zero.IsReferenced()) {
- __ B(&done);
- }
break;
}
@@ -6702,14 +7199,38 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
super_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- __ CompareAndBranchIfNonZero(out, &loop);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ B(&done);
- __ Bind(&success);
- __ Mov(out, 1);
- if (zero.IsReferenced()) {
- __ B(&done);
+ // This is essentially a null check, but it sets the condition flags to the
+ // proper value for the code that follows the loop, i.e. not `eq`.
+ __ Cmp(out, 1);
+ __ B(hs, &loop, /* far_target */ false);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ // If `out` is null, we use it for the result, and the condition flags
+ // have already been set to `ne`, so the IT block that comes afterwards
+ // (and which handles the successful case) turns into a NOP (instead of
+ // overwriting `out`).
+ __ Bind(&success);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ // There is only one branch to the `success` label (which is bound to this
+ // IT block), and it has the same condition, `eq`, so in that case the MOV
+ // is executed.
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ B(final_label);
+ __ Bind(&success);
+ __ Mov(out, 1);
}
+
break;
}
@@ -6732,14 +7253,34 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
component_offset,
maybe_temp_loc,
kCompilerReadBarrierOption);
- // If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ // If `out` is null, we use it for the result, and jump to the final label.
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
- __ Bind(&exact_check);
- __ Mov(out, 1);
- __ B(&done);
+ __ Cmp(out, 0);
+ // We speculatively set the result to false without changing the condition
+ // flags, which allows us to avoid some branching later.
+ __ Mov(LeaveFlags, out, 0);
+
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used.
+ if (out.IsLow()) {
+ __ Bind(&exact_check);
+
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+
+ __ it(eq);
+ __ mov(eq, out, 1);
+ } else {
+ __ B(ne, final_label, /* far_target */ false);
+ __ Bind(&exact_check);
+ __ Mov(out, 1);
+ }
+
break;
}
@@ -6759,9 +7300,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
- if (zero.IsReferenced()) {
- __ B(&done);
- }
break;
}
@@ -6790,18 +7328,10 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
/* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
- if (zero.IsReferenced()) {
- __ B(&done);
- }
break;
}
}
- if (zero.IsReferenced()) {
- __ Bind(&zero);
- __ Mov(out, 0);
- }
-
if (done.IsReferenced()) {
__ Bind(&done);
}
@@ -6877,9 +7407,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
}
switch (type_check_kind) {
@@ -6943,7 +7474,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
vixl32::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -6971,7 +7502,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7039,7 +7570,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
@@ -7231,10 +7764,12 @@ void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
return;
}
if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
- __ And(out, first, value);
+ __ And(out, first, value);
+ } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
+ __ Bic(out, first, ~value);
} else {
- DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value));
- __ Bic(out, first, ~value);
+ DCHECK(IsPowerOfTwo(value + 1));
+ __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
}
}
@@ -7974,9 +8509,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral(
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
- bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
- Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
@@ -8036,8 +8569,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
- boot_image_address_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -8071,13 +8603,6 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
target_type.dex_file,
target_type.type_index.index_));
}
- for (const auto& entry : boot_image_address_patches_) {
- DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- VIXLUInt32Literal* literal = entry.second;
- DCHECK(literal->IsBound());
- uint32_t literal_offset = literal->GetLocation();
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 2a636dbd99..1e9669dc38 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -401,10 +401,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
vixl::aarch32::Label* true_target,
vixl::aarch32::Label* false_target);
- void GenerateVcmp(HInstruction* instruction);
- void GenerateFPJumps(HCondition* cond,
- vixl::aarch32::Label* true_label,
- vixl::aarch32::Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond,
vixl::aarch32::Label* true_label,
vixl::aarch32::Label* false_label);
@@ -510,6 +506,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
return &(block_labels_[block->GetBlockId()]);
}
+ vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label);
+
void Initialize() OVERRIDE {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
@@ -752,8 +750,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // Deduplication map for patchable boot image addresses.
- Uint32ToLiteralMap boot_image_address_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c9dde7cc55..287891feae 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -391,7 +391,8 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
public:
- explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+ explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal)
+ : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
@@ -401,7 +402,9 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
+ if (!is_fatal_) {
+ SaveLiveRegisters(codegen, locations);
+ }
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -424,13 +427,19 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
}
- RestoreLiveRegisters(codegen, locations);
- __ B(GetExitLabel());
+ if (!is_fatal_) {
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
}
const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
+ bool IsFatal() const OVERRIDE { return is_fatal_; }
+
private:
+ const bool is_fatal_;
+
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
};
@@ -452,6 +461,536 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
};
+class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimNot,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierMarkSlowPathMIPS(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == FP)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9);
+ __ Jalr(entrypoint_.AsRegister<Register>());
+ __ NopIfNoReordering();
+ } else {
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this,
+ /* direct */ false);
+ }
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp1)
+ : SlowPathCodeMIPS(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use AT or TMP to save the old reference, as those
+ // are used by the code that follows, but we need the old reference after
+ // the call to the ReadBarrierMarkRegX entry point.
+ DCHECK_NE(temp1_, AT);
+ DCHECK_NE(temp1_, TMP);
+ __ Move(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == FP)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this,
+ /* direct */ false);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // the compare-and-set (CAS) loop below would abort, leaving the
+ // field as-is.
+ MipsLabel done;
+ __ Beq(temp1_, ref_reg, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ Register base = obj_;
+ // The UnsafeCASObject intrinsic uses a register pair as field
+ // offset ("long offset"), of which only the low part contains
+ // data.
+ Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register expected = temp1_;
+ Register value = ref_reg;
+ Register tmp_ptr = TMP; // Pointer to actual memory.
+ Register tmp = AT; // Value in memory.
+
+ __ Addu(tmp_ptr, base, offset);
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value` if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ MipsLabel loop_head, exit_loop;
+ __ Bind(&loop_head);
+ if (is_r6) {
+ __ LlR6(tmp, tmp_ptr);
+ } else {
+ __ LlR2(tmp, tmp_ptr);
+ }
+ __ Bne(tmp, expected, &exit_loop);
+ __ Move(tmp, value);
+ if (is_r6) {
+ __ ScR6(tmp, tmp_ptr);
+ } else {
+ __ ScR2(tmp, tmp_ptr);
+ }
+ __ Beqz(tmp, &loop_head);
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value` if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp1_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : SlowPathCodeMIPS(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ LoadFromOffset(kLoadWord, out, out, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ Register index_reg = index_.AsRegister<Register>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::mips::MipsAssembler::Sll and
+ // art::mips::MipsAssembler::Addiu32 below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Move(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Sll(index_reg, index_reg, TIMES_4);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ Addiu32(index_reg, index_reg, offset_);
+ } else {
+ // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+ // intrinsics, `index_` is not shifted by a scale factor of 2
+ // (as in the case of ArrayGet), as it is actually an offset
+ // to an object field within an object.
+ DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegisterPair());
+ // UnsafeGet's offset location is a register pair, the low
+ // part contains the correct offset.
+ index = index_.ToLow();
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+ }
+ mips_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
+
+ private:
+ Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+ size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref &&
+ i != obj &&
+ !codegen->IsCoreCalleeSaveRegister(i) &&
+ !codegen->IsBlockedCoreRegister(i)) {
+ return static_cast<Register>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on MIPS
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeMIPS(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+ mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS);
+};
+
CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
const MipsInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -482,8 +1021,6 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
clobbered_ra_(false) {
@@ -1026,8 +1563,7 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- boot_image_type_patches_.size() +
- boot_image_address_patches_.size();
+ boot_image_type_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -1061,13 +1597,6 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
target_type.dex_file,
target_type.type_index.index_));
}
- for (const auto& entry : boot_image_address_patches_) {
- DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
DCHECK_EQ(size, linker_patches->size());
}
@@ -1125,9 +1654,7 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_f
}
Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
- bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
- Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1313,10 +1840,26 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
+ IsDirectEntrypoint(entrypoint));
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
+}
+
+void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path,
+ bool direct) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset, direct);
+}
+
+void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) {
bool reordering = __ SetReorder(false);
- __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value());
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
__ Jalr(T9);
- if (IsDirectEntrypoint(entrypoint)) {
+ if (direct) {
// Reserve argument space on stack (for $a0-$a3) for
// entrypoints that directly reference native implementations.
// Called function may use this space to store $a0-$a3 regs.
@@ -1326,9 +1869,6 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
__ Nop(); // In delay slot.
}
__ SetReorder(reordering);
- if (EntrypointRequiresStackMap(entrypoint)) {
- RecordPcInfo(instruction, dex_pc, slow_path);
- }
}
void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
@@ -1888,37 +2428,56 @@ void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) {
}
void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ if (Primitive::IsFloatingPointType(type)) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_array_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
-auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
- auto null_checker = [this, instruction]() {
- this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) {
+ auto null_checker = [codegen, instruction]() {
+ codegen->MaybeRecordImplicitNullCheck(instruction);
};
return null_checker;
}
void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
+ Location out_loc = locations->Out();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Primitive::Type type = instruction->GetType();
const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1931,7 +2490,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1944,7 +2503,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1958,7 +2517,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
__ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -2011,10 +2570,9 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2027,8 +2585,53 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ true);
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction,
+ out_loc,
+ out_loc,
+ obj_loc,
+ data_offset,
+ index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
- Register out = locations->Out().AsRegisterPairLow<Register>();
+ Register out = out_loc.AsRegisterPairLow<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2042,7 +2645,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimFloat: {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister out = out_loc.AsFpuRegister<FRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2056,7 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimDouble: {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister out = out_loc.AsFpuRegister<FRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2114,23 +2717,28 @@ Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instr
}
void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
- bool needs_runtime_call = instruction->NeedsTypeCheck();
+ Primitive::Type value_type = instruction->GetComponentType();
+
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
- if (needs_runtime_call) {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ may_need_runtime_call_for_type_check ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+ locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
} else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
- locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
- } else {
- locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
- }
+ locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+ }
+ if (needs_write_barrier) {
+ // Temporary register for the write barrier.
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
}
}
@@ -2140,10 +2748,10 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
Location index = locations->InAt(1);
Location value_location = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool needs_runtime_call = locations->WillCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Register base_reg = index.IsConstant() ? obj : TMP;
switch (value_type) {
@@ -2184,9 +2792,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
break;
}
- case Primitive::kPrimInt:
+ case Primitive::kPrimInt: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ Register value = value_location.AsRegister<Register>();
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ }
+ break;
+ }
+
case Primitive::kPrimNot: {
- if (!needs_runtime_call) {
+ if (value_location.IsConstant()) {
+ // Just setting null.
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
@@ -2194,22 +2820,110 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
__ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
__ Addu(base_reg, obj, base_reg);
}
- if (value_location.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
- __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
- DCHECK(!needs_write_barrier);
- } else {
- Register value = value_location.AsRegister<Register>();
- __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
- if (needs_write_barrier) {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ DCHECK_EQ(value, 0);
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ DCHECK(!needs_write_barrier);
+ DCHECK(!may_need_runtime_call_for_type_check);
+ break;
+ }
+
+ DCHECK(needs_write_barrier);
+ Register value = value_location.AsRegister<Register>();
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = TMP; // Doesn't need to survive slow path.
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ MipsLabel done;
+ SlowPathCodeMIPS* slow_path = nullptr;
+
+ if (may_need_runtime_call_for_type_check) {
+ slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (instruction->GetValueCanBeNull()) {
+ MipsLabel non_zero;
+ __ Bnez(value, &non_zero);
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
}
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ __ B(&done);
+ __ Bind(&non_zero);
}
+
+ // Note that when read barriers are enabled, the type checks
+ // are performed without read barriers. This is fine, even in
+ // the case where a class object is in the from-space after
+ // the flip, as a comparison involving such a type would not
+ // produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow
+ // path.
+
+ // /* HeapReference<Class> */ temp1 = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ MipsLabel do_put;
+ __ Beq(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnez(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+ }
+ }
+
+ Register source = value;
+ if (kPoisonHeapReferences) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ __ Move(temp1, value);
+ __ PoisonHeapReference(temp1);
+ source = temp1;
+ }
+
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+ if (!may_need_runtime_call_for_type_check) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
}
break;
}
@@ -2299,30 +3013,234 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
__ Bgeu(index, length, slow_path->GetEntryLabel());
}
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction,
- LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck:
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ break;
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ }
+
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- // Note that TypeCheckSlowPathMIPS uses this register too.
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register obj_cls = locations->GetTemp(0).AsRegister<Register>();
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const uint32_t object_array_data_offset =
+ mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ MipsLabel done;
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
+ // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+ // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+ // read barriers is done for performance and code size reasons.
+ bool is_type_check_slow_path_fatal = false;
+ if (!kEmitCompilerReadBarrier) {
+ is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ }
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+ is_type_check_slow_path_fatal);
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ Beqz(obj, slow_path->GetExitLabel());
- // Compare the class of `obj` with `cls`.
- __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
- __ Bne(obj_cls, cls, slow_path->GetEntryLabel());
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kArrayCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Jump to slow path for throwing the exception or doing a
+ // more involved array check.
+ __ Bne(temp, cls, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ MipsLabel loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, compare the classes.
+ __ Bne(temp, cls, &loop);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Walk over the class hierarchy to find a match.
+ MipsLabel loop;
+ __ Bind(&loop);
+ __ Beq(temp, cls, &done);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception. Otherwise, jump to the beginning of the loop.
+ __ Bnez(temp, &loop);
+ __ B(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Do an exact check.
+ __ Beq(temp, cls, &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the component type is null, jump to the slow path to throw the exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, the object is indeed an array, further check that this component
+ // type is not a primitive type.
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ // We always go into the type check slow path for the unresolved check case.
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ B(slow_path->GetEntryLabel());
+ break;
+
+ case TypeCheckKind::kInterfaceCheck: {
+ // Avoid read barriers to improve performance of the fast path. We can not get false
+ // positives by doing this.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Iftable is never null.
+ __ Lw(TMP, temp, array_length_offset);
+ // Loop through the iftable and check if any class matches.
+ MipsLabel loop;
+ __ Bind(&loop);
+ __ Addiu(temp, temp, 2 * kHeapReferenceSize); // Possibly in delay slot on R2.
+ __ Beqz(TMP, slow_path->GetEntryLabel());
+ __ Lw(AT, temp, object_array_data_offset - 2 * kHeapReferenceSize);
+ __ MaybeUnpoisonHeapReference(AT);
+ // Go to next interface.
+ __ Addiu(TMP, TMP, -2);
+ // Compare the classes and continue the loop if they do not match.
+ __ Bne(AT, cls, &loop);
+ break;
+ }
+ }
+
+ __ Bind(&done);
__ Bind(slow_path->GetExitLabel());
}
@@ -4855,8 +5773,15 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
Primitive::Type field_type = field_info.GetFieldType();
bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
bool generate_volatile = field_info.IsVolatile() && is_wide;
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
+ instruction,
+ generate_volatile
+ ? LocationSummary::kCallOnMainOnly
+ : (object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall));
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
@@ -4877,7 +5802,18 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object field get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // object's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
}
@@ -4887,11 +5823,13 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
uint32_t dex_pc) {
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
+ Location dst_loc = locations->Out();
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
switch (type) {
case Primitive::kPrimBoolean:
@@ -4930,37 +5868,61 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
if (type == Primitive::kPrimDouble) {
// FP results are returned in core registers. Need to move them.
- Location out = locations->Out();
- if (out.IsFpuRegister()) {
- __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+ if (dst_loc.IsFpuRegister()) {
+ __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>());
__ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
- out.AsFpuRegister<FRegister>());
+ dst_loc.AsFpuRegister<FRegister>());
} else {
- DCHECK(out.IsDoubleStackSlot());
+ DCHECK(dst_loc.IsDoubleStackSlot());
__ StoreToOffset(kStoreWord,
locations->GetTemp(1).AsRegister<Register>(),
SP,
- out.GetStackIndex());
+ dst_loc.GetStackIndex());
__ StoreToOffset(kStoreWord,
locations->GetTemp(2).AsRegister<Register>(),
SP,
- out.GetStackIndex() + 4);
+ dst_loc.GetStackIndex() + 4);
}
}
} else {
- if (!Primitive::IsFloatingPointType(type)) {
+ if (type == Primitive::kPrimNot) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check */ true);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+ } else if (!Primitive::IsFloatingPointType(type)) {
Register dst;
if (type == Primitive::kPrimLong) {
- DCHECK(locations->Out().IsRegisterPair());
- dst = locations->Out().AsRegisterPairLow<Register>();
+ DCHECK(dst_loc.IsRegisterPair());
+ dst = dst_loc.AsRegisterPairLow<Register>();
} else {
- DCHECK(locations->Out().IsRegister());
- dst = locations->Out().AsRegister<Register>();
+ DCHECK(dst_loc.IsRegister());
+ dst = dst_loc.AsRegister<Register>();
}
__ LoadFromOffset(load_type, dst, obj, offset, null_checker);
} else {
- DCHECK(locations->Out().IsFpuRegister());
- FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ DCHECK(dst_loc.IsFpuRegister());
+ FRegister dst = dst_loc.AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
__ LoadSFromOffset(dst, obj, offset, null_checker);
} else {
@@ -4969,7 +5931,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
}
}
- if (is_volatile) {
+ // Memory barriers, in the case of references, are handled in the
+ // previous switch statement.
+ if (is_volatile && (type != Primitive::kPrimNot)) {
GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
}
@@ -5016,7 +5980,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
StoreOperandType store_type = kStoreByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- auto null_checker = GetImplicitNullChecker(instruction);
+ bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
switch (type) {
case Primitive::kPrimBoolean:
@@ -5089,7 +6054,16 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
} else {
src = value_location.AsRegister<Register>();
}
- __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ __ PoisonHeapReference(TMP, src);
+ __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+ } else {
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ }
} else {
FRegister src = value_location.AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
@@ -5100,8 +6074,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
}
}
- // TODO: memory barriers?
- if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+ if (needs_write_barrier) {
Register src = value_location.AsRegister<Register>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
}
@@ -5130,14 +6103,133 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst
instruction->GetValueCanBeNull());
}
-void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
- HInstruction* instruction ATTRIBUTE_UNUSED,
- Location root,
- Register obj,
- uint32_t offset) {
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ Register out_reg = out.AsRegister<Register>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Move(maybe_temp.AsRegister<Register>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ Register out_reg = out.AsRegister<Register>();
+ Register obj_reg = obj.AsRegister<Register>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option) {
Register root_reg = root.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ Addiu32(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
} else {
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5147,47 +6239,425 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ ScaleFactor no_scale_factor = TIMES_1;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ offset,
+ no_index,
+ no_scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ ScaleFactor scale_factor = TIMES_4;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ data_offset,
+ index,
+ scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ __ Sync(0); // Barrier to prevent load-load reordering.
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
+ // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ } else {
+ // Handle the special case of the
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics, which use a register pair as index ("long
+ // offset"), of which only the low part contains data.
+ Register index_reg = index.IsRegisterPair()
+ ? index.AsRegisterPairLow<Register>()
+ : index.AsRegister<Register>();
+ __ Sll(TMP, index_reg, scale_factor);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeMIPS* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena())
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction,
+ ref,
+ obj,
+ /* field_offset */ index,
+ temp_reg);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref);
+ }
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit into the sign bit (31) and
+ // performing a branch on less than zero.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+ __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+ __ Bltz(temp_reg, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the reference load.
+ //
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<Register>());
+ }
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
- LocationSummary::CallKind call_kind =
- instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck:
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ break;
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ }
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
MipsLabel done;
+ SlowPathCodeMIPS* slow_path = nullptr;
// Return 0 if `obj` is null.
- // TODO: Avoid this check if we know `obj` is not null.
- __ Move(out, ZERO);
- __ Beqz(obj, &done);
-
- // Compare the class of `obj` with `cls`.
- __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
- if (instruction->IsExactCheck()) {
- // Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
- __ Sltiu(out, out, 1);
- } else {
- // If the classes are not equal, we go into a slow path.
- DCHECK(locations->OnlyCallsOnSlowPath());
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
- codegen_->AddSlowPath(slow_path);
- __ Bne(out, cls, slow_path->GetEntryLabel());
- __ LoadConst32(out, 1);
- __ Bind(slow_path->GetExitLabel());
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Move(out, ZERO);
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Classes must be equal for the instanceof to succeed.
+ __ Xor(out, out, cls);
+ __ Sltiu(out, out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ MipsLabel loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ Bne(out, cls, &loop);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Walk over the class hierarchy to find a match.
+ MipsLabel loop, success;
+ __ Bind(&loop);
+ __ Beq(out, cls, &success);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ __ Bnez(out, &loop);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ B(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Do an exact check.
+ MipsLabel success;
+ __ Beq(out, cls, &success);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ out = out->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ component_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Sltiu(out, out, 1);
+ __ B(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayCheck: {
+ // No read barrier since the slow path will retry upon failure.
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ Bne(out, cls, slow_path->GetEntryLabel());
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved and interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) {
@@ -5239,6 +6709,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -5307,9 +6785,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen
HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
// We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
// TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
@@ -5345,9 +6820,6 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
// We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
@@ -5562,6 +7034,14 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem
// temp = object->GetClass();
__ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -5588,12 +7068,13 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(V0));
+ calling_convention.GetReturnLocation(Primitive::kPrimNot));
return;
}
DCHECK(!cls->NeedsAccessCheck());
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -5648,6 +7129,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
}
+ const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+ ? kWithoutReadBarrier
+ : kCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5657,11 +7141,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
GenerateGcRootFieldLoad(cls,
out_loc,
base_or_current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
__ LoadLiteral(out,
base_or_current_method_reg,
codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -5669,6 +7155,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
bool reordering = __ SetReorder(false);
@@ -5678,7 +7165,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
@@ -5692,7 +7179,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
__ SetReorder(reordering);
generate_null_check = true;
break;
@@ -5704,7 +7191,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
__ SetReorder(reordering);
break;
}
@@ -5837,7 +7324,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
__ SetReorder(reordering);
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
@@ -5853,7 +7344,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
- GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
__ SetReorder(reordering);
return;
}
@@ -6059,6 +7554,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
}
@@ -6076,6 +7573,8 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 47eba50248..3875c4bdba 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -241,6 +241,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
uint32_t dex_pc,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
// Generate a GC root reference load:
//
// root <- *(obj + offset)
@@ -249,7 +281,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
- uint32_t offset);
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
+
void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
// When the function returns `false` it means that the condition holds if `dst` is non-zero
// and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
@@ -297,7 +331,6 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
- auto GetImplicitNullChecker(HInstruction* instruction);
void GenPackedSwitchWithCompares(Register value_reg,
int32_t lower_bound,
uint32_t num_entries,
@@ -354,6 +387,91 @@ class CodeGeneratorMIPS : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field = false);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
void MarkGCCard(Register object, Register value, bool value_can_be_null);
// Register allocation.
@@ -401,6 +519,15 @@ class CodeGeneratorMIPS : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) OVERRIDE;
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path,
+ bool direct);
+
+ void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct);
+
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
@@ -536,8 +663,6 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // Deduplication map for patchable boot image addresses.
- Uint32ToLiteralMap boot_image_address_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<JitPatchInfo> jit_string_patches_;
// Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5be0da4011..78b31e9e86 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -336,7 +336,8 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+ explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal)
+ : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
@@ -347,7 +348,9 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations);
+ if (!is_fatal_) {
+ SaveLiveRegisters(codegen, locations);
+ }
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -370,13 +373,19 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
}
- RestoreLiveRegisters(codegen, locations);
- __ Bc(GetExitLabel());
+ if (!is_fatal_) {
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
}
const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
+ bool IsFatal() const OVERRIDE { return is_fatal_; }
+
private:
+ const bool is_fatal_;
+
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
};
@@ -398,6 +407,528 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
};
+class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimNot,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == S8)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9);
+ __ Jalr(entrypoint_.AsRegister<GpuRegister>());
+ __ Nop();
+ } else {
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this);
+ }
+ __ Bc(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ Location field_offset,
+ GpuRegister temp1)
+ : SlowPathCodeMIPS64(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use AT or TMP to save the old reference, as those
+ // are used by the code that follows, but we need the old reference after
+ // the call to the ReadBarrierMarkRegX entry point.
+ DCHECK_NE(temp1_, AT);
+ DCHECK_NE(temp1_, TMP);
+ __ Move(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == S8)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // the compare-and-set (CAS) loop below would abort, leaving the
+ // field as-is.
+ Mips64Label done;
+ __ Beqc(temp1_, ref_reg, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ GpuRegister base = obj_;
+ GpuRegister offset = field_offset_.AsRegister<GpuRegister>();
+ GpuRegister expected = temp1_;
+ GpuRegister value = ref_reg;
+ GpuRegister tmp_ptr = TMP; // Pointer to actual memory.
+ GpuRegister tmp = AT; // Value in memory.
+
+ __ Daddu(tmp_ptr, base, offset);
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value` if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ Mips64Label loop_head, exit_loop;
+ __ Bind(&loop_head);
+ __ Ll(tmp, tmp_ptr);
+ // The LL instruction sign-extends the 32-bit value, but
+ // 32-bit references must be zero-extended. Zero-extend `tmp`.
+ __ Dext(tmp, tmp, 0, 32);
+ __ Bnec(tmp, expected, &exit_loop);
+ __ Move(tmp, value);
+ __ Sc(tmp, tmp_ptr);
+ __ Beqzc(tmp, &loop_head);
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value` if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ Bc(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const GpuRegister obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const GpuRegister temp1_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : SlowPathCodeMIPS64(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ LoadFromOffset(kLoadWord, out, out, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ GpuRegister index_reg = index_.AsRegister<GpuRegister>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::mips64::Mips64Assembler::Sll and
+ // art::mips64::MipsAssembler::Addiu32 below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Move(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Sll(index_reg, index_reg, TIMES_4);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ Addiu32(index_reg, index_reg, offset_);
+ } else {
+ // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+ // intrinsics, `index_` is not shifted by a scale factor of 2
+ // (as in the case of ArrayGet), as it is actually an offset
+ // to an object field within an object.
+ DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegister());
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+ }
+ mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierForHeapReferenceSlowPathMIPS64";
+ }
+
+ private:
+ GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>());
+ size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref &&
+ i != obj &&
+ !codegen->IsCoreCalleeSaveRegister(i) &&
+ !codegen->IsBlockedCoreRegister(i)) {
+ return static_cast<GpuRegister>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on MIPS64
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeMIPS64(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ root_,
+ Primitive::kPrimNot);
+ mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64);
+};
+
CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
const Mips64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -430,8 +961,6 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -551,26 +1080,21 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
return;
}
- // Make sure the frame size isn't unreasonably large. Per the various APIs
- // it looks like it should always be less than 2GB in size, which allows
- // us using 32-bit signed offsets from the stack pointer.
- if (GetFrameSize() > 0x7FFFFFFF)
- LOG(FATAL) << "Stack frame larger than 2GB";
+ // Make sure the frame size isn't unreasonably large.
+ if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) {
+ LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes";
+ }
// Spill callee-saved registers.
- // Note that their cumulative size is small and they can be indexed using
- // 16-bit offsets.
-
- // TODO: increment/decrement SP in one step instead of two or remove this comment.
- uint32_t ofs = FrameEntrySpillSize();
+ uint32_t ofs = GetFrameSize();
__ IncreaseFrameSize(ofs);
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
GpuRegister reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
ofs -= kMips64DoublewordSize;
- __ Sd(reg, SP, ofs);
+ __ StoreToOffset(kStoreDoubleword, reg, SP, ofs);
__ cfi().RelOffset(DWARFReg(reg), ofs);
}
}
@@ -579,23 +1103,16 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() {
FpuRegister reg = kFpuCalleeSaves[i];
if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
ofs -= kMips64DoublewordSize;
- __ Sdc1(reg, SP, ofs);
+ __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs);
__ cfi().RelOffset(DWARFReg(reg), ofs);
}
}
- // Allocate the rest of the frame and store the current method pointer
- // at its end.
-
- __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
// Save the current method if we need it. Note that we do not
// do this in HCurrentMethod, as the instruction might have been removed
// in the SSA graph.
if (RequiresCurrentMethod()) {
- static_assert(IsInt<16>(kCurrentMethodStackOffset),
- "kCurrentMethodStackOffset must fit into int16_t");
- __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+ __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
}
if (GetGraph()->HasShouldDeoptimizeFlag()) {
@@ -608,42 +1125,32 @@ void CodeGeneratorMIPS64::GenerateFrameExit() {
__ cfi().RememberState();
if (!HasEmptyFrame()) {
- // Deallocate the rest of the frame.
-
- __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
// Restore callee-saved registers.
- // Note that their cumulative size is small and they can be indexed using
- // 16-bit offsets.
-
- // TODO: increment/decrement SP in one step instead of two or remove this comment.
- uint32_t ofs = 0;
-
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- FpuRegister reg = kFpuCalleeSaves[i];
- if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
- __ Ldc1(reg, SP, ofs);
- ofs += kMips64DoublewordSize;
+ // For better instruction scheduling restore RA before other registers.
+ uint32_t ofs = GetFrameSize();
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ GpuRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ ofs -= kMips64DoublewordSize;
+ __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs);
__ cfi().Restore(DWARFReg(reg));
}
}
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- GpuRegister reg = kCoreCalleeSaves[i];
- if (allocated_registers_.ContainsCoreRegister(reg)) {
- __ Ld(reg, SP, ofs);
- ofs += kMips64DoublewordSize;
+ for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+ FpuRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ ofs -= kMips64DoublewordSize;
+ __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs);
__ cfi().Restore(DWARFReg(reg));
}
}
- DCHECK_EQ(ofs, FrameEntrySpillSize());
- __ DecreaseFrameSize(ofs);
+ __ DecreaseFrameSize(GetFrameSize());
}
- __ Jr(RA);
- __ Nop();
+ __ Jic(RA, 0);
__ cfi().RestoreState();
__ cfi().DefCFAOffset(GetFrameSize());
@@ -937,8 +1444,7 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- boot_image_type_patches_.size() +
- boot_image_address_patches_.size();
+ boot_image_type_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -972,13 +1478,6 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
target_type.dex_file,
target_type.type_index.index_));
}
- for (const auto& entry : boot_image_address_patches_) {
- DCHECK(GetCompilerOptions().GetIncludePatchInformation());
- Literal* literal = entry.second;
- DCHECK(literal->GetLabel()->IsBound());
- uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
DCHECK_EQ(size, linker_patches->size());
}
@@ -1042,9 +1541,7 @@ Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex
}
Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) {
- bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
- Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1165,23 +1662,32 @@ void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
- __ Jalr(T9);
- __ Nop();
+ GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
if (EntrypointRequiresStackMap(entrypoint)) {
RecordPcInfo(instruction, dex_pc, slow_path);
}
}
+void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
+ __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ __ Jalr(T9);
+ __ Nop();
+}
+
void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
GpuRegister class_reg) {
__ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
__ LoadConst32(AT, mirror::Class::kStatusInitialized);
__ Bltc(TMP, AT, slow_path->GetEntryLabel());
- // TODO: barrier needed?
+ // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+ __ Sync(0);
__ Bind(slow_path->GetExitLabel());
}
@@ -1472,73 +1978,99 @@ void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) {
}
void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ if (Primitive::IsFloatingPointType(type)) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_array_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) {
+ auto null_checker = [codegen, instruction]() {
+ codegen->MaybeRecordImplicitNullCheck(instruction);
+ };
+ return null_checker;
+}
+
void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+ Location out_loc = locations->Out();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Primitive::Type type = instruction->GetType();
const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
} else {
__ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
}
break;
}
case Primitive::kPrimByte: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
} else {
__ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
}
break;
}
case Primitive::kPrimShort: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
case Primitive::kPrimChar: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
- __ LoadFromOffset(kLoadWord, TMP, obj, count_offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
__ Dext(TMP, TMP, 0, 1);
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -1563,7 +2095,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
__ LoadFromOffset(kLoadUnsignedHalfword,
out,
obj,
- data_offset + (const_index << TIMES_2));
+ data_offset + (const_index << TIMES_2),
+ null_checker);
}
} else {
GpuRegister index_reg = index.AsRegister<GpuRegister>();
@@ -1581,67 +2114,111 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
} else {
__ Dsll(TMP, index_reg, TIMES_2);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
}
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(load_type, out, obj, offset);
+ __ LoadFromOffset(load_type, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(load_type, out, TMP, data_offset);
+ __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ true);
+ } else {
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction,
+ out_loc,
+ out_loc,
+ obj_loc,
+ data_offset,
+ index);
+ }
}
break;
}
case Primitive::kPrimLong: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
case Primitive::kPrimFloat: {
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+ __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+ __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
}
case Primitive::kPrimDouble: {
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1650,9 +2227,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
- if (!maybe_compressed_char_at) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1674,24 +2248,48 @@ void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction)
}
}
+Location LocationsBuilderMIPS64::RegisterOrZeroConstant(HInstruction* instruction) {
+ return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern())
+ ? Location::ConstantLocation(instruction->AsConstant())
+ : Location::RequiresRegister();
+}
+
+Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* instruction) {
+ // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register.
+ // We can store a non-zero float or double constant without first loading it into the FPU,
+ // but we should only prefer this if the constant has a single use.
+ if (instruction->IsConstant() &&
+ (instruction->AsConstant()->IsZeroBitPattern() ||
+ instruction->GetUses().HasExactlyOneElement())) {
+ return Location::ConstantLocation(instruction->AsConstant());
+ // Otherwise fall through and require an FPU register for the constant.
+ }
+ return Location::RequiresFpuRegister();
+}
+
void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
- bool needs_runtime_call = instruction->NeedsTypeCheck();
+ Primitive::Type value_type = instruction->GetComponentType();
+
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
- if (needs_runtime_call) {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ may_need_runtime_call_for_type_check ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+ locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
} else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
- locations->SetInAt(2, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(2, Location::RequiresRegister());
- }
+ locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+ }
+ if (needs_write_barrier) {
+ // Temporary register for the write barrier.
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
}
}
@@ -1699,23 +2297,29 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
LocationSummary* locations = instruction->GetLocations();
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
Location index = locations->InAt(1);
+ Location value_location = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool needs_runtime_call = locations->WillCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+ GpuRegister base_reg = index.IsConstant() ? obj : TMP;
switch (value_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1;
} else {
- __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+ __ Daddu(base_reg, obj, index.AsRegister<GpuRegister>());
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker);
}
break;
}
@@ -1723,90 +2327,208 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker);
+ }
+ break;
+ }
+
+ case Primitive::kPrimInt: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
}
break;
}
- case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- if (!needs_runtime_call) {
+ if (value_location.IsConstant()) {
+ // Just setting null.
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK(index.IsRegister()) << index;
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- if (needs_write_barrier) {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ DCHECK_EQ(value, 0);
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ DCHECK(!needs_write_barrier);
+ DCHECK(!may_need_runtime_call_for_type_check);
+ break;
+ }
+
+ DCHECK(needs_write_barrier);
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister temp2 = TMP; // Doesn't need to survive slow path.
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ Mips64Label done;
+ SlowPathCodeMIPS64* slow_path = nullptr;
+
+ if (may_need_runtime_call_for_type_check) {
+ slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (instruction->GetValueCanBeNull()) {
+ Mips64Label non_zero;
+ __ Bnezc(value, &non_zero);
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ __ Bc(&done);
+ __ Bind(&non_zero);
}
+
+ // Note that when read barriers are enabled, the type checks
+ // are performed without read barriers. This is fine, even in
+ // the case where a class object is in the from-space after
+ // the flip, as a comparison involving such a type would not
+ // produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow
+ // path.
+
+ // /* HeapReference<Class> */ temp1 = obj->klass_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Mips64Label do_put;
+ __ Beqc(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnezc(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bnec(temp1, temp2, slow_path->GetEntryLabel());
+ }
+ }
+
+ GpuRegister source = value;
+ if (kPoisonHeapReferences) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ __ Move(temp1, value);
+ __ PoisonHeapReference(temp1);
+ source = temp1;
+ }
+
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+ if (!may_need_runtime_call_for_type_check) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
}
break;
}
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
}
break;
}
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
- DCHECK(locations->InAt(2).IsFpuRegister());
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
- __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+ __ StoreFpuToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
}
break;
}
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
- DCHECK(locations->InAt(2).IsFpuRegister());
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
- __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+ __ StoreFpuToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
}
break;
}
@@ -1815,11 +2537,6 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -1847,31 +2564,234 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction)
__ Bgeuc(index, length, slow_path->GetEntryLabel());
}
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction,
- LocationSummary::kCallOnSlowPath);
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck:
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ break;
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ }
+
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- // Note that TypeCheckSlowPathMIPS64 uses this register too.
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
+ Location temp_loc = locations->GetTemp(0);
+ GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const uint32_t object_array_data_offset =
+ mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ Mips64Label done;
+ // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+ // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+ // read barriers is done for performance and code size reasons.
+ bool is_type_check_slow_path_fatal = false;
+ if (!kEmitCompilerReadBarrier) {
+ is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ }
SlowPathCodeMIPS64* slow_path =
- new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
+ new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+ is_type_check_slow_path_fatal);
codegen_->AddSlowPath(slow_path);
- // TODO: avoid this check if we know obj is not null.
- __ Beqzc(obj, slow_path->GetExitLabel());
- // Compare the class of `obj` with `cls`.
- __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
- __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Beqzc(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kArrayCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Jump to slow path for throwing the exception or doing a
+ // more involved array check.
+ __ Bnec(temp, cls, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Mips64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception.
+ __ Beqzc(temp, slow_path->GetEntryLabel());
+ // Otherwise, compare the classes.
+ __ Bnec(temp, cls, &loop);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Walk over the class hierarchy to find a match.
+ Mips64Label loop;
+ __ Bind(&loop);
+ __ Beqc(temp, cls, &done);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception. Otherwise, jump to the beginning of the loop.
+ __ Bnezc(temp, &loop);
+ __ Bc(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Do an exact check.
+ __ Beqc(temp, cls, &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the component type is null, jump to the slow path to throw the exception.
+ __ Beqzc(temp, slow_path->GetEntryLabel());
+ // Otherwise, the object is indeed an array, further check that this component
+ // type is not a primitive type.
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnezc(temp, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ // We always go into the type check slow path for the unresolved check case.
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ Bc(slow_path->GetEntryLabel());
+ break;
+
+ case TypeCheckKind::kInterfaceCheck: {
+ // Avoid read barriers to improve performance of the fast path. We can not get false
+ // positives by doing this.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Iftable is never null.
+ __ Lw(TMP, temp, array_length_offset);
+ // Loop through the iftable and check if any class matches.
+ Mips64Label loop;
+ __ Bind(&loop);
+ __ Beqzc(TMP, slow_path->GetEntryLabel());
+ __ Lwu(AT, temp, object_array_data_offset);
+ __ MaybeUnpoisonHeapReference(AT);
+ // Go to next interface.
+ __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
+ __ Addiu(TMP, TMP, -2);
+ // Compare the classes and continue the loop if they do not match.
+ __ Bnec(AT, cls, &loop);
+ break;
+ }
+ }
+
+ __ Bind(&done);
__ Bind(slow_path->GetExitLabel());
}
@@ -3069,14 +3989,31 @@ void CodeGeneratorMIPS64::GenerateNop() {
}
void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
- const FieldInfo& field_info ATTRIBUTE_UNUSED) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ const FieldInfo& field_info) {
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+ instruction,
+ object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object field get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // object's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3084,8 +4021,14 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+ Location dst_loc = locations->Out();
LoadOperandType load_type = kLoadUnsignedByte;
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
switch (type) {
case Primitive::kPrimBoolean:
load_type = kLoadUnsignedByte;
@@ -3115,17 +4058,47 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
if (!Primitive::IsFloatingPointType(type)) {
- DCHECK(locations->Out().IsRegister());
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+ DCHECK(dst_loc.IsRegister());
+ GpuRegister dst = dst_loc.AsRegister<GpuRegister>();
+ if (type == Primitive::kPrimNot) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check */ true);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+ } else {
+ __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+ }
} else {
- DCHECK(locations->Out().IsFpuRegister());
- FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
- __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+ DCHECK(dst_loc.IsFpuRegister());
+ FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>();
+ __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // TODO: memory barrier?
+ // Memory barriers, in the case of references, are handled in the
+ // previous switch statement.
+ if (is_volatile && (type != Primitive::kPrimNot)) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
@@ -3134,9 +4107,9 @@ void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1)));
} else {
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1)));
}
}
@@ -3146,7 +4119,13 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location value_location = locations->InAt(1);
StoreOperandType store_type = kStoreByte;
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -3169,23 +4148,44 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
- if (!Primitive::IsFloatingPointType(type)) {
- DCHECK(locations->InAt(1).IsRegister());
- GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
- __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
+
+ if (value_location.IsConstant()) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
} else {
- DCHECK(locations->InAt(1).IsFpuRegister());
- FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
- __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+ if (!Primitive::IsFloatingPointType(type)) {
+ DCHECK(value_location.IsRegister());
+ GpuRegister src = value_location.AsRegister<GpuRegister>();
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ __ PoisonHeapReference(TMP, src);
+ __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+ } else {
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ }
+ } else {
+ DCHECK(value_location.IsFpuRegister());
+ FpuRegister src = value_location.AsFpuRegister<FpuRegister>();
+ __ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
+ }
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // TODO: memory barriers?
- if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
- DCHECK(locations->InAt(1).IsRegister());
- GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+ if (needs_write_barrier) {
+ DCHECK(value_location.IsRegister());
+ GpuRegister src = value_location.AsRegister<GpuRegister>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
}
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
}
void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -3204,14 +4204,134 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ GpuRegister out_reg = out.AsRegister<GpuRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ GpuRegister out_reg = out.AsRegister<GpuRegister>();
+ GpuRegister obj_reg = obj.AsRegister<GpuRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
- HInstruction* instruction ATTRIBUTE_UNUSED,
+ HInstruction* instruction,
Location root,
GpuRegister obj,
- uint32_t offset) {
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option) {
GpuRegister root_reg = root.AsRegister<GpuRegister>();
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset));
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
} else {
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -3221,48 +4341,418 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ ScaleFactor no_scale_factor = TIMES_1;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ offset,
+ no_index,
+ no_scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ ScaleFactor scale_factor = TIMES_4;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ data_offset,
+ index,
+ scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+ GpuRegister temp_reg = temp.AsRegister<GpuRegister>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ __ Sync(0); // Barrier to prevent load-load reordering.
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
+ // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
+ } else {
+ GpuRegister index_reg = index.AsRegister<GpuRegister>();
+ __ Dsll(TMP, index_reg, scale_factor);
+ __ Daddu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeMIPS64* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register. Thus `offset` and `scale_factor`
+ // above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena())
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction,
+ ref,
+ obj,
+ /* field_offset */ index,
+ temp_reg);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref);
+ }
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit into the sign bit (31) and
+ // performing a branch on less than zero.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+ __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+ __ Bltzc(temp_reg, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the reference load.
+ //
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<GpuRegister>());
+ }
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeMIPS64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
- LocationSummary::CallKind call_kind =
- instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck:
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ break;
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ }
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
+ Location out_loc = locations->Out();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Mips64Label done;
+ SlowPathCodeMIPS64* slow_path = nullptr;
// Return 0 if `obj` is null.
- // TODO: Avoid this check if we know `obj` is not null.
- __ Move(out, ZERO);
- __ Beqzc(obj, &done);
-
- // Compare the class of `obj` with `cls`.
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
- if (instruction->IsExactCheck()) {
- // Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
- __ Sltiu(out, out, 1);
- } else {
- // If the classes are not equal, we go into a slow path.
- DCHECK(locations->OnlyCallsOnSlowPath());
- SlowPathCodeMIPS64* slow_path =
- new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
- codegen_->AddSlowPath(slow_path);
- __ Bnec(out, cls, slow_path->GetEntryLabel());
- __ LoadConst32(out, 1);
- __ Bind(slow_path->GetExitLabel());
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Move(out, ZERO);
+ __ Beqzc(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Classes must be equal for the instanceof to succeed.
+ __ Xor(out, out, cls);
+ __ Sltiu(out, out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Mips64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqzc(out, &done);
+ __ Bnec(out, cls, &loop);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Walk over the class hierarchy to find a match.
+ Mips64Label loop, success;
+ __ Bind(&loop);
+ __ Beqc(out, cls, &success);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ __ Bnezc(out, &loop);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Bc(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // Do an exact check.
+ Mips64Label success;
+ __ Beqc(out, cls, &success);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ out = out->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ component_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqzc(out, &done);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Sltiu(out, out, 1);
+ __ Bc(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayCheck: {
+ // No read barrier since the slow path will retry upon failure.
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ Bnec(out, cls, slow_path->GetEntryLabel());
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved and interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ Bc(slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) {
@@ -3325,6 +4815,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
__ LoadFromOffset(kLoadDoubleword, temp, temp,
mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -3381,9 +4879,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg
HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
@@ -3411,9 +4906,6 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
bool fallback_load = false;
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kInvalid:
@@ -3567,6 +5059,14 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
// temp = object->GetClass();
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -3598,7 +5098,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
}
DCHECK(!cls->NeedsAccessCheck());
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -3627,6 +5128,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
}
+ const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+ ? kWithoutReadBarrier
+ : kCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
@@ -3636,10 +5140,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
GenerateGcRootFieldLoad(cls,
out_loc,
current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
__ LoadLiteral(out,
kLoadUnsignedWord,
codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -3647,6 +5153,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -3654,7 +5161,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
@@ -3666,8 +5173,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
generate_null_check = true;
break;
}
@@ -3677,7 +5184,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass()));
- GenerateGcRootFieldLoad(cls, out_loc, out, 0);
+ GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
break;
case HLoadClass::LoadKind::kDexCacheViaMethod:
case HLoadClass::LoadKind::kInvalid:
@@ -3773,8 +5280,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
codegen_->AddSlowPath(slow_path);
__ Beqzc(out, slow_path->GetEntryLabel());
@@ -3787,7 +5298,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
load->GetStringIndex(),
load->GetString()));
- GenerateGcRootFieldLoad(load, out_loc, out, 0);
+ GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption);
return;
default:
break;
@@ -3944,6 +5455,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
}
@@ -3961,6 +5474,8 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
@@ -4722,12 +6237,34 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins
}
}
-void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
- UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
- UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ instruction->GetIndex(), kMips64PointerSize).SizeValue();
+ __ LoadFromOffset(kLoadDoubleword,
+ locations->Out().AsRegister<GpuRegister>(),
+ locations->InAt(0).AsRegister<GpuRegister>(),
+ method_offset);
+ } else {
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetIndex(), kMips64PointerSize));
+ __ LoadFromOffset(kLoadDoubleword,
+ locations->Out().AsRegister<GpuRegister>(),
+ locations->InAt(0).AsRegister<GpuRegister>(),
+ mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+ __ LoadFromOffset(kLoadDoubleword,
+ locations->Out().AsRegister<GpuRegister>(),
+ locations->Out().AsRegister<GpuRegister>(),
+ method_offset);
+ }
}
} // namespace mips64
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 26cc7dc788..fd1a174608 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -189,6 +189,8 @@ class LocationsBuilderMIPS64 : public HGraphVisitor {
void HandleShift(HBinaryOperation* operation);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ Location RegisterOrZeroConstant(HInstruction* instruction);
+ Location FpuRegisterOrConstantForStore(HInstruction* instruction);
InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_;
@@ -235,6 +237,38 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
// Generate a GC root reference load:
//
// root <- *(obj + offset)
@@ -243,7 +277,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
GpuRegister obj,
- uint32_t offset);
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
+
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Mips64Label* true_target,
@@ -314,6 +350,91 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field = false);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
// Register allocation.
@@ -364,6 +485,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) OVERRIDE;
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
@@ -492,8 +621,6 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // Deduplication map for patchable boot image addresses.
- Uint32ToLiteralMap boot_image_address_patches_;
// Patches for string root accesses in JIT compiled code.
StringToLiteralMap jit_string_patches_;
// Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
new file mode 100644
index 0000000000..e7f7b3019c
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
new file mode 100644
index 0000000000..f4874fe2bc
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -0,0 +1,672 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm64.h"
+#include "mirror/array-inl.h"
+
+using namespace vixl::aarch64; // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+using helpers::DRegisterFrom;
+using helpers::HeapOperand;
+using helpers::InputRegisterAt;
+using helpers::Int64ConstantFrom;
+using helpers::XRegisterFrom;
+
+#define __ GetVIXLAssembler()->
+
+void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Dup(dst.V8B(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Dup(dst.V4H(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Dup(dst.V2S(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ instruction->IsVecNot() ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Scvtf(dst.V2S(), src.V2S());
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Neg(dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Neg(dst.V4H(), src.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Neg(dst.V2S(), src.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fneg(dst.V2S(), src.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Abs(dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Abs(dst.V4H(), src.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Abs(dst.V2S(), src.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fabs(dst.V2S(), src.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: // special case boolean-not
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Movi(dst.V8B(), 1);
+ __ Eor(dst.V8B(), dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Not(dst.V8B(), src.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Add(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Add(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Add(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ And(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Shl(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Shl(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Shl(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sshr(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sshr(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sshr(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ushr(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ushr(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Ushr(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+ HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Register base = InputRegisterAt(instruction, 0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+
+ Primitive::Type packed_type = instruction->GetPackedType();
+ uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t shift = Primitive::ComponentSizeShift(packed_type);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(base);
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << shift;
+ __ Add(temp, base, offset);
+ } else {
+ if (instruction->InputAt(0)->IsIntermediateAddress()) {
+ temp = base;
+ } else {
+ __ Add(temp, base, offset);
+ }
+ __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift));
+ }
+ return HeapOperand(temp);
+}
+
+void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ FPRegister reg = DRegisterFrom(reg_loc);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ld1(reg.V8B(), mem);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ld1(reg.V4H(), mem);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Ld1(reg.V2S(), mem);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ FPRegister reg = DRegisterFrom(reg_loc);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ St1(reg.V8B(), mem);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ St1(reg.V4H(), mem);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ St1(reg.V2S(), mem);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
new file mode 100644
index 0000000000..74fa584e09
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm_vixl.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
+
+void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
new file mode 100644
index 0000000000..6969abd422
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+
+namespace art {
+namespace mips {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace mips
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
new file mode 100644
index 0000000000..87118cefa5
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips64.h"
+
+namespace art {
+namespace mips64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace mips64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
new file mode 100644
index 0000000000..8dabb4d08f
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -0,0 +1,807 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to load the register pair.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(reg, tmp);
+ __ punpcklqdq(reg, reg);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ cvtdq2ps(dst, src);
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, dst);
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, dst);
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ if (instruction->GetPackedType() == Primitive::kPrimInt) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(dst, src);
+ __ pxor(tmp, tmp);
+ __ pcmpgtd(tmp, dst);
+ __ pxor(dst, tmp);
+ __ psubd(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrld(dst, Immediate(1));
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrlq(dst, Immediate(1));
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Boolean-not requires a temporary to construct the 16 x one.
+ if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: { // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ pxor(dst, dst);
+ __ pcmpeqb(tmp, tmp); // all ones
+ __ psubb(dst, tmp); // 16 x one
+ __ pxor(dst, src);
+ break;
+ }
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pcmpeqb(dst, dst); // all ones
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ paddd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ paddq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ addps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ addpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pmullw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmulld(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ mulps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ mulpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ divps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ divpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pand(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pandn(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andnps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andnpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ por(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ orps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ orpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location base = locations->InAt(0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ ScaleFactor scale = TIMES_1;
+ switch (size) {
+ case 2: scale = TIMES_2; break;
+ case 4: scale = TIMES_4; break;
+ case 8: scale = TIMES_8; break;
+ default: break;
+ }
+ return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
+}
+
+void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace x86
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
new file mode 100644
index 0000000000..e95608839b
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -0,0 +1,800 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86_64.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86_64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(reg, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ cvtdq2ps(dst, src);
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, dst);
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, dst);
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ if (instruction->GetPackedType() == Primitive::kPrimInt) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(dst, src);
+ __ pxor(tmp, tmp);
+ __ pcmpgtd(tmp, dst);
+ __ pxor(dst, tmp);
+ __ psubd(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrld(dst, Immediate(1));
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrlq(dst, Immediate(1));
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Boolean-not requires a temporary to construct the 16 x one.
+ if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: { // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ pxor(dst, dst);
+ __ pcmpeqb(tmp, tmp); // all ones
+ __ psubb(dst, tmp); // 16 x one
+ __ pxor(dst, src);
+ break;
+ }
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pcmpeqb(dst, dst); // all ones
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ paddd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ paddq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ addps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ addpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pmullw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmulld(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ mulps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ mulpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ divps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ divpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pand(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pandn(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andnps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andnpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ por(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ orps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ orpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psllw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pslld(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psllq(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psraw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrad(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrld(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location base = locations->InAt(0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ ScaleFactor scale = TIMES_1;
+ switch (size) {
+ case 2: scale = TIMES_2; break;
+ case 4: scale = TIMES_4; break;
+ case 8: scale = TIMES_8; break;
+ default: break;
+ }
+ return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
+}
+
+void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace x86_64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b779aed763..08a752f1d2 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -720,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
}
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ }
return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ }
return GetFloatingPointSpillSlotSize();
}
@@ -1015,7 +1026,6 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
assembler_(graph->GetArena()),
isa_features_(isa_features),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4603,13 +4613,6 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
}
-void CodeGeneratorX86::RecordSimplePatch() {
- if (GetCompilerOptions().GetIncludePatchInformation()) {
- simple_patches_.emplace_back();
- __ Bind(&simple_patches_.back());
- }
-}
-
void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
DCHECK(GetCompilerOptions().IsBootImage());
HX86ComputeBaseMethodAddress* address = nullptr;
@@ -4682,17 +4685,12 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- simple_patches_.size() +
string_patches_.size() +
boot_image_type_patches_.size() +
type_bss_entry_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- for (const Label& label : simple_patches_) {
- uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
if (!GetCompilerOptions().IsBootImage()) {
DCHECK(boot_image_type_patches_.empty());
EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -5712,7 +5710,11 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5815,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot());
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
}
} else if (source.IsStackSlot()) {
if (destination.IsRegister()) {
@@ -5839,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsDoubleStackSlot()) << destination;
MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -6154,7 +6161,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
__ movl(out, Immediate(address));
- codegen_->RecordSimplePatch();
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6311,7 +6317,6 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
__ movl(out, Immediate(address));
- codegen_->RecordSimplePatch();
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 5360dc9209..ca3a9eadd2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // 8 bytes == 2 words for each spill.
- return 2 * kX86WordSize;
+ return GetGraph()->HasSIMD()
+ ? 4 * kX86WordSize // 16 bytes == 4 words for each spill
+ : 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -412,7 +413,6 @@ class CodeGeneratorX86 : public CodeGenerator {
// Generate a call to a virtual method.
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
- void RecordSimplePatch();
void RecordBootStringPatch(HLoadString* load_string);
void RecordBootTypePatch(HLoadClass* load_class);
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -633,8 +633,6 @@ class CodeGeneratorX86 : public CodeGenerator {
// PC-relative DexCache access info.
ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
- // Patch locations for patchoat where the linker doesn't do any other work.
- ArenaDeque<Label> simple_patches_;
// String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
ArenaDeque<X86PcRelativePatchInfo> string_patches_;
// Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 179bf6d3d1..ff6e099d12 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -741,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -983,7 +986,7 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
+ Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
__ movq(temp.AsRegister<CpuRegister>(),
@@ -1070,13 +1073,6 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
kX86_64PointerSize).SizeValue()));
}
-void CodeGeneratorX86_64::RecordSimplePatch() {
- if (GetCompilerOptions().GetIncludePatchInformation()) {
- simple_patches_.emplace_back();
- __ Bind(&simple_patches_.back());
- }
-}
-
void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
DCHECK(GetCompilerOptions().IsBootImage());
string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
@@ -1126,17 +1122,12 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
DCHECK(linker_patches->empty());
size_t size =
pc_relative_dex_cache_patches_.size() +
- simple_patches_.size() +
string_patches_.size() +
boot_image_type_patches_.size() +
type_bss_entry_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
- for (const Label& label : simple_patches_) {
- uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
- linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
- }
if (!GetCompilerOptions().IsBootImage()) {
DCHECK(boot_image_type_patches_.empty());
EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -1170,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -1227,7 +1226,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
isa_features_(isa_features),
constant_area_start_(0),
pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -3662,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
bool is_div = instruction->IsDiv();
LocationSummary* locations = instruction->GetLocations();
@@ -5165,7 +5163,11 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5254,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5304,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (destination.IsStackSlot()) {
__ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.AsFpuRegister<XmmRegister>());
}
}
}
@@ -5544,8 +5553,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
- __ movl(out, Immediate(address)); // Zero-extended.
- codegen_->RecordSimplePatch();
+ __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -5680,8 +5688,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
- __ movl(out, Immediate(address)); // Zero-extended.
- codegen_->RecordSimplePatch();
+ __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3a83731b3f..c8336dabd9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- return kX86_64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
+ : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -406,7 +408,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
- void RecordSimplePatch();
void RecordBootStringPatch(HLoadString* load_string);
void RecordBootTypePatch(HLoadClass* load_class);
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -602,8 +603,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// PC-relative DexCache access info.
ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
- // Patch locations for patchoat where the linker doesn't do any other work.
- ArenaDeque<Label> simple_patches_;
// String patch locations; type depends on configuration (app .bss or boot image PIC).
ArenaDeque<PatchInfo<Label>> string_patches_;
// Type patch locations for boot image (always PIC).
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
new file mode 100644
index 0000000000..dc3d378e75
--- /dev/null
+++ b/compiler/optimizing/code_sinking.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_sinking.h"
+
+#include "common_dominator.h"
+#include "nodes.h"
+
+namespace art {
+
+void CodeSinking::Run() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ if (exit == nullptr) {
+ // Infinite loop, just bail.
+ return;
+ }
+ // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
+ // as an indicator of an uncommon branch.
+ for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
+ if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+ SinkCodeToUncommonBranch(exit_predecessor);
+ }
+ }
+}
+
+static bool IsInterestingInstruction(HInstruction* instruction) {
+ // Instructions from the entry graph (for example constants) are never interesting to move.
+ if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
+ return false;
+ }
+ // We want to move moveable instructions that cannot throw, as well as
+ // heap stores and allocations.
+
+ // Volatile stores cannot be moved.
+ if (instruction->IsInstanceFieldSet()) {
+ if (instruction->AsInstanceFieldSet()->IsVolatile()) {
+ return false;
+ }
+ }
+
+ // Check allocations first, as they can throw, but it is safe to move them.
+ if (instruction->IsNewInstance() || instruction->IsNewArray()) {
+ return true;
+ }
+
+ // All other instructions that can throw cannot be moved.
+ if (instruction->CanThrow()) {
+ return false;
+ }
+
+ // We can only store on local allocations. Other heap references can
+ // be escaping. Note that allocations can escape too, but we only move
+ // allocations if their users can move to, or are in the list of
+ // post dominated blocks.
+ if (instruction->IsInstanceFieldSet()) {
+ if (!instruction->InputAt(0)->IsNewInstance()) {
+ return false;
+ }
+ }
+
+ if (instruction->IsArraySet()) {
+ if (!instruction->InputAt(0)->IsNewArray()) {
+ return false;
+ }
+ }
+
+ // Heap accesses cannot go pass instructions that have memory side effects, which
+ // we are not tracking here. Note that the load/store elimination optimization
+ // runs before this optimization, and should have removed interesting ones.
+ // In theory, we could handle loads of local allocations, but this is currently
+ // hard to test, as LSE removes them.
+ if (instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet() ||
+ instruction->IsArrayGet()) {
+ return false;
+ }
+
+ if (instruction->IsInstanceFieldSet() ||
+ instruction->IsArraySet() ||
+ instruction->CanBeMoved()) {
+ return true;
+ }
+ return false;
+}
+
+static void AddInstruction(HInstruction* instruction,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ // Add to the work list if the instruction is not in the list of blocks
+ // to discard, hasn't been already processed and is of interest.
+ if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
+ !processed_instructions.IsBitSet(instruction->GetId()) &&
+ IsInterestingInstruction(instruction)) {
+ worklist->push_back(instruction);
+ }
+}
+
+static void AddInputs(HInstruction* instruction,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ for (HInstruction* input : instruction->GetInputs()) {
+ AddInstruction(input, processed_instructions, discard_blocks, worklist);
+ }
+}
+
+static void AddInputs(HBasicBlock* block,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+ }
+}
+
+static bool ShouldFilterUse(HInstruction* instruction,
+ HInstruction* user,
+ const ArenaBitVector& post_dominated) {
+ if (instruction->IsNewInstance()) {
+ return user->IsInstanceFieldSet() &&
+ (user->InputAt(0) == instruction) &&
+ !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+ } else if (instruction->IsNewArray()) {
+ return user->IsArraySet() &&
+ (user->InputAt(0) == instruction) &&
+ !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+ }
+ return false;
+}
+
+
+// Find the ideal position for moving `instruction`. If `filter` is true,
+// we filter out store instructions to that instruction, which are processed
+// first in the step (3) of the sinking algorithm.
+// This method is tailored to the sinking algorithm, unlike
+// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
+static HInstruction* FindIdealPosition(HInstruction* instruction,
+ const ArenaBitVector& post_dominated,
+ bool filter = false) {
+ DCHECK(!instruction->IsPhi()); // Makes no sense for Phi.
+
+ // Find the target block.
+ CommonDominator finder(/* start_block */ nullptr);
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
+ finder.Update(user->IsPhi()
+ ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
+ : user->GetBlock());
+ }
+ }
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ DCHECK(!use.GetUser()->GetHolder()->IsPhi());
+ DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
+ finder.Update(use.GetUser()->GetHolder()->GetBlock());
+ }
+ HBasicBlock* target_block = finder.Get();
+ if (target_block == nullptr) {
+ // No user we can go next to? Likely a LSE or DCE limitation.
+ return nullptr;
+ }
+
+ // Move to the first dominator not in a loop, if we can.
+ while (target_block->IsInLoop()) {
+ if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
+ break;
+ }
+ target_block = target_block->GetDominator();
+ DCHECK(target_block != nullptr);
+ }
+
+ // Find insertion position. No need to filter anymore, as we have found a
+ // target block.
+ HInstruction* insert_pos = nullptr;
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ if (use.GetUser()->GetBlock() == target_block &&
+ (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+ insert_pos = use.GetUser();
+ }
+ }
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HInstruction* user = use.GetUser()->GetHolder();
+ if (user->GetBlock() == target_block &&
+ (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
+ insert_pos = user;
+ }
+ }
+ if (insert_pos == nullptr) {
+ // No user in `target_block`, insert before the control flow instruction.
+ insert_pos = target_block->GetLastInstruction();
+ DCHECK(insert_pos->IsControlFlow());
+ // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+ if (insert_pos->IsIf()) {
+ HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+ if (if_input == insert_pos->GetPrevious()) {
+ insert_pos = if_input;
+ }
+ }
+ }
+ DCHECK(!insert_pos->IsPhi());
+ return insert_pos;
+}
+
+
+void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
+ // Local allocator to discard data structures created below at the end of
+ // this optimization.
+ ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+
+ size_t number_of_instructions = graph_->GetCurrentInstructionId();
+ ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
+ ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+ ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+ ArenaBitVector instructions_that_can_move(
+ &allocator, number_of_instructions, /* expandable */ false);
+ ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
+
+ // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
+ // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
+ // computint the post dominator tree, but that could be too time consuming. Also,
+ // we should start the analysis from blocks dominated by an uncommon branch, but we
+ // don't profile branches yet.
+ bool found_block = false;
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (block == end_block) {
+ found_block = true;
+ post_dominated.SetBit(block->GetBlockId());
+ } else if (found_block) {
+ bool is_post_dominated = true;
+ if (block->GetSuccessors().empty()) {
+ // We currently bail for loops.
+ is_post_dominated = false;
+ } else {
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+ is_post_dominated = false;
+ break;
+ }
+ }
+ }
+ if (is_post_dominated) {
+ post_dominated.SetBit(block->GetBlockId());
+ }
+ }
+ }
+
+ // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
+ // of instructions in these blocks that are not themselves in these blocks.
+ // Also find the common dominator of the found post dominated blocks, to help filtering
+ // out un-movable uses in step (2).
+ CommonDominator finder(end_block);
+ for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+ if (post_dominated.IsBitSet(i)) {
+ finder.Update(graph_->GetBlocks()[i]);
+ AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
+ }
+ }
+ HBasicBlock* common_dominator = finder.Get();
+
+ // Step (2): iterate over the worklist to find sinking candidates.
+ while (!worklist.empty()) {
+ HInstruction* instruction = worklist.back();
+ if (processed_instructions.IsBitSet(instruction->GetId())) {
+ // The instruction has already been processed, continue. This happens
+ // when the instruction is the input/user of multiple instructions.
+ worklist.pop_back();
+ continue;
+ }
+ bool all_users_in_post_dominated_blocks = true;
+ bool can_move = true;
+ // Check users of the instruction.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
+ !instructions_that_can_move.IsBitSet(user->GetId())) {
+ all_users_in_post_dominated_blocks = false;
+ // If we've already processed this user, or the user cannot be moved, or
+ // is not dominating the post dominated blocks, bail.
+ // TODO(ngeoffray): The domination check is an approximation. We should
+ // instead check if the dominated blocks post dominate the user's block,
+ // but we do not have post dominance information here.
+ if (processed_instructions.IsBitSet(user->GetId()) ||
+ !IsInterestingInstruction(user) ||
+ !user->GetBlock()->Dominates(common_dominator)) {
+ can_move = false;
+ break;
+ }
+ }
+ }
+
+ // Check environment users of the instruction. Some of these users require
+ // the instruction not to move.
+ if (all_users_in_post_dominated_blocks) {
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HEnvironment* environment = use.GetUser();
+ HInstruction* user = environment->GetHolder();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+ if (graph_->IsDebuggable() ||
+ user->IsDeoptimize() ||
+ user->CanThrowIntoCatchBlock() ||
+ (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
+ can_move = false;
+ break;
+ }
+ }
+ }
+ }
+ if (!can_move) {
+ // Instruction cannot be moved, mark it as processed and remove it from the work
+ // list.
+ processed_instructions.SetBit(instruction->GetId());
+ worklist.pop_back();
+ } else if (all_users_in_post_dominated_blocks) {
+ // Instruction is a candidate for being sunk. Mark it as such, remove it from the
+ // work list, and add its inputs to the work list.
+ instructions_that_can_move.SetBit(instruction->GetId());
+ move_in_order.push_back(instruction);
+ processed_instructions.SetBit(instruction->GetId());
+ worklist.pop_back();
+ AddInputs(instruction, processed_instructions, post_dominated, &worklist);
+ // Drop the environment use not in the list of post-dominated block. This is
+ // to help step (3) of this optimization, when we start moving instructions
+ // closer to their use.
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HEnvironment* environment = use.GetUser();
+ HInstruction* user = environment->GetHolder();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+ environment->RemoveAsUserOfInput(use.GetIndex());
+ environment->SetRawEnvAt(use.GetIndex(), nullptr);
+ }
+ }
+ } else {
+ // The information we have on the users was not enough to decide whether the
+ // instruction could be moved.
+ // Add the users to the work list, and keep the instruction in the work list
+ // to process it again once all users have been processed.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
+ }
+ }
+ }
+
+ // Make sure we process instructions in dominated order. This is required for heap
+ // stores.
+ std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
+ return b->StrictlyDominates(a);
+ });
+
+ // Step (3): Try to move sinking candidates.
+ for (HInstruction* instruction : move_in_order) {
+ HInstruction* position = nullptr;
+ if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+ if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
+ // A store can trivially move, but it can safely do so only if the heap
+ // location it stores to can also move.
+ // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
+ // from the set and all their inputs.
+ continue;
+ }
+ // Find the position of the instruction we're storing into, filtering out this
+ // store and all other stores to that instruction.
+ position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+
+ // The position needs to be dominated by the store, in order for the store to move there.
+ if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
+ continue;
+ }
+ } else {
+ // Find the ideal position within the post dominated blocks.
+ position = FindIdealPosition(instruction, post_dominated);
+ if (position == nullptr) {
+ continue;
+ }
+ }
+ // Bail if we could not find a position in the post dominated blocks (for example,
+ // if there are multiple users whose common dominator is not in the list of
+ // post dominated blocks).
+ if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
+ continue;
+ }
+ MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+ instruction->MoveBefore(position, /* ensure_safety */ false);
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
new file mode 100644
index 0000000000..59cda52a8c
--- /dev/null
+++ b/compiler/optimizing/code_sinking.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimization pass to move instructions into uncommon branches,
+ * when it is safe to do so.
+ */
+class CodeSinking : public HOptimization {
+ public:
+ CodeSinking(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kCodeSinkingPassName, stats) {}
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kCodeSinkingPassName = "code_sinking";
+
+ private:
+ // Try to move code only used by `end_block` and all its post-dominated / dominated
+ // blocks, to these blocks.
+ void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+
+ DISALLOW_COPY_AND_ASSIGN(CodeSinking);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cd954043f5..31cd204c9f 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -74,7 +74,6 @@ class CodegenTargetConfig {
}
private:
- CodegenTargetConfig() {}
InstructionSet isa_;
CreateCodegenFn create_codegen_;
};
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745520..01304ac35b 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
}
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
inline vixl::aarch32::Register RegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index b459d24d7c..9f012cfbb2 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -36,12 +36,16 @@ class CommonDominator {
// Create a finder starting with a given block.
explicit CommonDominator(HBasicBlock* block)
: dominator_(block), chain_length_(ChainLength(block)) {
- DCHECK(block != nullptr);
}
// Update the common dominator with another block.
void Update(HBasicBlock* block) {
DCHECK(block != nullptr);
+ if (dominator_ == nullptr) {
+ dominator_ = block;
+ chain_length_ = ChainLength(block);
+ return;
+ }
HBasicBlock* block2 = dominator_;
DCHECK(block2 != nullptr);
if (block == block2) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2bf5c53e17..cc3c143b15 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
codegen_.DumpCoreRegister(stream, location.high());
} else if (location.IsUnallocated()) {
stream << "unallocated";
- } else {
- DCHECK(location.IsDoubleStackSlot());
+ } else if (location.IsDoubleStackSlot()) {
stream << "2x" << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsSIMDStackSlot());
+ stream << "4x" << location.GetStackIndex() << "(sp)";
}
}
@@ -503,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+ StartAttributeStream("kind") << deoptimize->GetKind();
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 82ee93d5c2..9516ccb385 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -29,7 +29,21 @@ namespace art {
*/
class InductionVarAnalysisTest : public CommonCompilerTest {
public:
- InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
+ InductionVarAnalysisTest()
+ : pool_(),
+ allocator_(&pool_),
+ iva_(nullptr),
+ entry_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ constant0_(nullptr),
+ constant1_(nullptr),
+ constant2_(nullptr),
+ constant7_(nullptr),
+ constant100_(nullptr),
+ constantm1_(nullptr),
+ float_constant0_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5539413aad..d6513c8e34 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -57,21 +57,27 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
return false;
}
-/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */
+/** Computes a * b for a,b > 0 (at least until first overflow happens). */
+static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
+ if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
+ *overflow = true;
+ }
+ return a * b;
+}
+
+/** Returns b^e for b,e > 0. Sets overflow if arithmetic wrap-around occurred. */
static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) {
- DCHECK_GE(b, 1);
- DCHECK_GE(e, 1);
+ DCHECK_LT(0, b);
+ DCHECK_LT(0, e);
int64_t pow = 1;
while (e) {
if (e & 1) {
- int64_t oldpow = pow;
- pow *= b;
- if (pow < oldpow) {
- *overflow = true;
- }
+ pow = SafeMul(pow, b, overflow);
}
e >>= 1;
- b *= b;
+ if (e) {
+ b = SafeMul(b, b, overflow);
+ }
}
return pow;
}
@@ -377,6 +383,54 @@ bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) co
return false;
}
+bool InductionVarRange::IsUnitStride(HInstruction* instruction,
+ /*out*/ HInstruction** offset) const {
+ HLoopInformation* loop = nullptr;
+ HInductionVarAnalysis::InductionInfo* info = nullptr;
+ HInductionVarAnalysis::InductionInfo* trip = nullptr;
+ if (HasInductionInfo(instruction, instruction, &loop, &info, &trip)) {
+ if (info->induction_class == HInductionVarAnalysis::kLinear &&
+ info->op_b->operation == HInductionVarAnalysis::kFetch &&
+ !HInductionVarAnalysis::IsNarrowingLinear(info)) {
+ int64_t stride_value = 0;
+ if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) {
+ int64_t off_value = 0;
+ if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) {
+ *offset = nullptr;
+ } else {
+ *offset = info->op_b->fetch;
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
+ HGraph* graph,
+ HBasicBlock* block) {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ HInstruction* taken_test = nullptr;
+ HInstruction* trip_expr = nullptr;
+ if (IsBodyTripCount(trip)) {
+ if (!GenerateCode(trip->op_b, nullptr, graph, block, &taken_test, false, false)) {
+ return nullptr;
+ }
+ }
+ if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) {
+ if (taken_test != nullptr) {
+ HInstruction* zero = graph->GetConstant(trip->type, 0);
+ trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc));
+ }
+ return trip_expr;
+ }
+ }
+ return nullptr;
+}
+
//
// Private class methods.
//
@@ -1157,12 +1211,15 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
HInstruction* opb = nullptr;
switch (info->induction_class) {
case HInductionVarAnalysis::kInvariant:
- // Invariants (note that even though is_min does not impact code generation for
- // invariants, some effort is made to keep this parameter consistent).
+ // Invariants (note that since invariants only have other invariants as
+ // sub expressions, viz. no induction, there is no need to adjust is_min).
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
- case HInductionVarAnalysis::kRem: // no proper is_min for second arg
- case HInductionVarAnalysis::kXor: // no proper is_min for second arg
+ case HInductionVarAnalysis::kSub:
+ case HInductionVarAnalysis::kMul:
+ case HInductionVarAnalysis::kDiv:
+ case HInductionVarAnalysis::kRem:
+ case HInductionVarAnalysis::kXor:
case HInductionVarAnalysis::kLT:
case HInductionVarAnalysis::kLE:
case HInductionVarAnalysis::kGT:
@@ -1174,6 +1231,12 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+ case HInductionVarAnalysis::kSub:
+ operation = new (graph->GetArena()) HSub(type, opa, opb); break;
+ case HInductionVarAnalysis::kMul:
+ operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break;
+ case HInductionVarAnalysis::kDiv:
+ operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break;
case HInductionVarAnalysis::kRem:
operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break;
case HInductionVarAnalysis::kXor:
@@ -1194,16 +1257,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
return true;
}
break;
- case HInductionVarAnalysis::kSub: // second reversed!
- if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
- GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
- if (graph != nullptr) {
- *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb));
- }
- return true;
- }
- break;
- case HInductionVarAnalysis::kNeg: // reversed!
+ case HInductionVarAnalysis::kNeg:
if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
if (graph != nullptr) {
*result = Insert(block, new (graph->GetArena()) HNeg(type, opb));
@@ -1240,9 +1294,9 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
}
break;
- default:
- break;
- }
+ case HInductionVarAnalysis::kNop:
+ LOG(FATAL) << "unexpected invariant nop";
+ } // switch invariant operation
break;
case HInductionVarAnalysis::kLinear: {
// Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
@@ -1293,7 +1347,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
break;
}
- }
+ } // switch induction class
}
return false;
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 6c424b78b9..0858d73982 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -24,7 +24,8 @@ namespace art {
/**
* This class implements range analysis on expressions within loops. It takes the results
* of induction variable analysis in the constructor and provides a public API to obtain
- * a conservative lower and upper bound value on each instruction in the HIR.
+ * a conservative lower and upper bound value or last value on each instruction in the HIR.
+ * The public API also provides a few general-purpose utility methods related to induction.
*
* The range analysis is done with a combination of symbolic and partial integral evaluation
* of expressions. The analysis avoids complications with wrap-around arithmetic on the integral
@@ -154,6 +155,19 @@ class InductionVarRange {
*/
bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ /**
+ * Checks if instruction is a unit stride induction inside the closest enveloping loop.
+ * Returns invariant offset on success.
+ */
+ bool IsUnitStride(HInstruction* instruction, /*out*/ HInstruction** offset) const;
+
+ /**
+ * Generates the trip count expression for the given loop. Code is generated in given block
+ * and graph. The expression is guarded by a taken test if needed. Returns the trip count
+ * expression on success or null otherwise.
+ */
+ HInstruction* GenerateTripCount(HLoopInformation* loop, HGraph* graph, HBasicBlock* block);
+
private:
/*
* Enum used in IsConstant() request.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d81817fb09..fcdf8eb7dc 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -48,6 +48,11 @@ class InductionVarRangeTest : public CommonCompilerTest {
EXPECT_EQ(v1.is_known, v2.is_known);
}
+ void ExpectInt(int32_t value, HInstruction* i) {
+ ASSERT_TRUE(i->IsIntConstant());
+ EXPECT_EQ(value, i->AsIntConstant()->GetValue());
+ }
+
//
// Construction methods.
//
@@ -757,10 +762,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
// Last value (unsimplified).
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsAdd());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1000, last->InputAt(0));
+ ExpectInt(0, last->InputAt(1));
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(1000, tc);
+ HInstruction* offset = nullptr;
+ EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+ EXPECT_TRUE(offset == nullptr);
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ ExpectInt(1000, tce);
}
TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -799,15 +814,27 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
// Last value (unsimplified).
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsSub());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, last->InputAt(0));
ASSERT_TRUE(last->InputAt(1)->IsNeg());
last = last->InputAt(1)->InputAt(0);
ASSERT_TRUE(last->IsSub());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(0, last->InputAt(0));
+ ExpectInt(1000, last->InputAt(1));
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(1000, tc);
+ HInstruction* offset = nullptr;
+ EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ ASSERT_TRUE(tce->IsNeg());
+ last = tce->InputAt(0);
+ EXPECT_TRUE(last->IsSub());
+ ExpectInt(0, last->InputAt(0));
+ ExpectInt(1000, last->InputAt(1));
}
TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -851,27 +878,22 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
// Verify lower is 0+0.
ASSERT_TRUE(lower != nullptr);
ASSERT_TRUE(lower->IsAdd());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(0, lower->InputAt(0));
+ ExpectInt(0, lower->InputAt(1));
// Verify upper is (V-1)+0.
ASSERT_TRUE(upper != nullptr);
ASSERT_TRUE(upper->IsAdd());
ASSERT_TRUE(upper->InputAt(0)->IsSub());
EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue());
- ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
- ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1, upper->InputAt(0)->InputAt(1));
+ ExpectInt(0, upper->InputAt(1));
// Verify taken-test is 0<V.
HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsLessThan());
- ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(0, taken->InputAt(0));
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
// Replacement.
@@ -880,6 +902,21 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(1), v1);
ExpectEqual(Value(y_, 1, 0), v2);
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(0, tc); // unknown
+ HInstruction* offset = nullptr;
+ EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+ EXPECT_TRUE(offset == nullptr);
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test
+ ExpectInt(0, tce->InputAt(0));
+ EXPECT_TRUE(tce->InputAt(1)->IsParameterValue());
+ EXPECT_TRUE(tce->InputAt(2)->IsLessThan());
}
TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
@@ -923,32 +960,26 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
// Verify lower is 1000-((1000-V)-1).
ASSERT_TRUE(lower != nullptr);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, lower->InputAt(0));
lower = lower->InputAt(1);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1, lower->InputAt(1));
lower = lower->InputAt(0);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, lower->InputAt(0));
EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
// Verify upper is 1000-0.
ASSERT_TRUE(upper != nullptr);
ASSERT_TRUE(upper->IsSub());
- ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1000, upper->InputAt(0));
+ ExpectInt(0, upper->InputAt(1));
// Verify taken-test is 1000>V.
HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsGreaterThan());
- ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, taken->InputAt(0));
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
// Replacement.
@@ -957,6 +988,23 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(y_, 1, 0), v1);
ExpectEqual(Value(999), v2);
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(0, tc); // unknown
+ HInstruction* offset = nullptr;
+ EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test
+ ExpectInt(0, tce->InputAt(0));
+ EXPECT_TRUE(tce->InputAt(1)->IsSub());
+ EXPECT_TRUE(tce->InputAt(2)->IsGreaterThan());
+ tce = tce->InputAt(1);
+ ExpectInt(1000, taken->InputAt(0));
+ EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
}
} // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3e340908bf..298ae5c847 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,29 +46,100 @@
namespace art {
-static constexpr size_t kMaximumNumberOfHInstructions = 32;
+// Instruction limit to control memory.
+static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
+
+// Maximum number of instructions for considering a method small,
+// which we will always try to inline if the other non-instruction limits
+// are not reached.
+static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
// Limit the number of dex registers that we accumulate while inlining
// to avoid creating large amount of nested environments.
static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
-// Avoid inlining within a huge method due to memory pressure.
-static constexpr size_t kMaximumCodeUnitSize = 4096;
+// Limit recursive call inlining, which do not benefit from too
+// much inlining compared to code locality.
+static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
+
+// Controls the use of inline caches in AOT mode.
+static constexpr bool kUseAOTInlineCaches = true;
+
+// We check for line numbers to make sure the DepthString implementation
+// aligns the output nicely.
+#define LOG_INTERNAL(msg) \
+ static_assert(__LINE__ > 10, "Unhandled line number"); \
+ static_assert(__LINE__ < 10000, "Unhandled line number"); \
+ VLOG(compiler) << DepthString(__LINE__) << msg
+
+#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
+#define LOG_NOTE() LOG_INTERNAL("Note: ")
+#define LOG_SUCCESS() LOG_INTERNAL("Success: ")
+#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
+
+std::string HInliner::DepthString(int line) const {
+ std::string value;
+ // Indent according to the inlining depth.
+ size_t count = depth_;
+ // Line numbers get printed in the log, so add a space if the log's line is less
+ // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
+ if (!kIsTargetBuild) {
+ if (line < 100) {
+ value += " ";
+ }
+ if (line < 1000) {
+ value += " ";
+ }
+ // Safeguard if this file reaches more than 10000 lines.
+ DCHECK_LT(line, 10000);
+ }
+ for (size_t i = 0; i < count; ++i) {
+ value += " ";
+ }
+ return value;
+}
-void HInliner::Run() {
- const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
- if ((compiler_options.GetInlineDepthLimit() == 0)
- || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
- return;
+static size_t CountNumberOfInstructions(HGraph* graph) {
+ size_t number_of_instructions = 0;
+ for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
+ for (HInstructionIterator instr_it(block->GetInstructions());
+ !instr_it.Done();
+ instr_it.Advance()) {
+ ++number_of_instructions;
+ }
}
- if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
- return;
+ return number_of_instructions;
+}
+
+void HInliner::UpdateInliningBudget() {
+ if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
+ // Always try to inline small methods.
+ inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
+ } else {
+ inlining_budget_ = std::max(
+ kMaximumNumberOfInstructionsForSmallMethod,
+ kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
}
+}
+
+void HInliner::Run() {
if (graph_->IsDebuggable()) {
// For simplicity, we currently never inline when the graph is debuggable. This avoids
// doing some logic in the runtime to discover if a method could have been inlined.
return;
}
+
+ // Initialize the number of instructions for the method being compiled. Recursive calls
+ // to HInliner::Run have already updated the instruction count.
+ if (outermost_graph_ == graph_) {
+ total_number_of_instructions_ = CountNumberOfInstructions(graph_);
+ }
+
+ UpdateInliningBudget();
+ DCHECK_NE(total_number_of_instructions_, 0u);
+ DCHECK_NE(inlining_budget_, 0u);
+
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
DCHECK(!blocks.empty());
@@ -249,20 +320,25 @@ class ScopedProfilingInfoInlineUse {
ProfilingInfo* const profiling_info_;
};
-static bool IsMonomorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- DCHECK_GE(InlineCache::kIndividualCacheSize, 2);
- return classes->Get(0) != nullptr && classes->Get(1) == nullptr;
-}
-
-static bool IsMegamorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
- if (classes->Get(i) == nullptr) {
- return false;
+HInliner::InlineCacheType HInliner::GetInlineCacheType(
+ const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ uint8_t number_of_types = 0;
+ for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) {
+ if (classes->Get(number_of_types) == nullptr) {
+ break;
}
}
- return true;
+
+ if (number_of_types == 0) {
+ return kInlineCacheUninitialized;
+ } else if (number_of_types == 1) {
+ return kInlineCacheMonomorphic;
+ } else if (number_of_types == InlineCache::kIndividualCacheSize) {
+ return kInlineCacheMegamorphic;
+ } else {
+ return kInlineCachePolymorphic;
+ }
}
static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
@@ -271,18 +347,6 @@ static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Clas
return classes->Get(0);
}
-static bool IsUninitialized(Handle<mirror::ObjectArray<mirror::Class>> classes)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- return classes->Get(0) == nullptr;
-}
-
-static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- DCHECK_GE(InlineCache::kIndividualCacheSize, 3);
- return classes->Get(1) != nullptr &&
- classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
-}
-
ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
if (!resolved_method->HasSingleImplementation()) {
return nullptr;
@@ -296,7 +360,24 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
return nullptr;
}
PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
- return resolved_method->GetSingleImplementation(pointer_size);
+ ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size);
+ if (single_impl == nullptr) {
+ return nullptr;
+ }
+ if (single_impl->IsProxyMethod()) {
+ // Proxy method is a generic invoker that's not worth
+ // devirtualizing/inlining. It also causes issues when the proxy
+ // method is in another dex file if we try to rewrite invoke-interface to
+ // invoke-virtual because a proxy method doesn't have a real dex file.
+ return nullptr;
+ }
+ if (!single_impl->GetDeclaringClass()->IsResolved()) {
+ // There's a race with the class loading, which updates the CHA info
+ // before setting the class to resolved. So we just bail for this
+ // rare occurence.
+ return nullptr;
+ }
+ return single_impl;
}
bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -309,17 +390,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
ScopedObjectAccess soa(Thread::Current());
uint32_t method_index = invoke_instruction->GetDexMethodIndex();
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
+ LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
- // We can query the dex cache directly. The verifier has populated it already.
ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
- ArtMethod* actual_method = nullptr;
if (resolved_method == nullptr) {
DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
- VLOG(compiler) << "Not inlining a String.<init> method";
+ LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
return false;
- } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ }
+ ArtMethod* actual_method = nullptr;
+
+ if (invoke_instruction->IsInvokeStaticOrDirect()) {
actual_method = resolved_method;
} else {
// Check if we can statically find the method.
@@ -332,6 +414,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
if (method != nullptr) {
cha_devirtualize = true;
actual_method = method;
+ LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
}
}
@@ -353,67 +436,226 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
}
return result;
}
-
DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
- // Check if we can use an inline cache.
- ArtMethod* caller = graph_->GetArtMethod();
- if (Runtime::Current()->UseJitCompilation()) {
- // Under JIT, we should always know the caller.
- DCHECK(caller != nullptr);
- ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
- ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
- if (profiling_info != nullptr) {
- StackHandleScope<1> hs(soa.Self());
- ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
- Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs.NewHandle(
- mirror::ObjectArray<mirror::Class>::Alloc(
- soa.Self(),
- class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
- InlineCache::kIndividualCacheSize));
- if (inline_cache == nullptr) {
- // We got an OOME. Just clear the exception, and don't inline.
- DCHECK(soa.Self()->IsExceptionPending());
- soa.Self()->ClearException();
- VLOG(compiler) << "Out of memory in the compiler when trying to inline";
- return false;
+ // Try using inline caches.
+ return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method);
+}
+
+static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
+ const DexCompilationUnit& compilation_unit,
+ StackHandleScope<1>* hs)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ Thread* self = Thread::Current();
+ ClassLinker* class_linker = compilation_unit.GetClassLinker();
+ Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
+ mirror::ObjectArray<mirror::Class>::Alloc(
+ self,
+ class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+ InlineCache::kIndividualCacheSize));
+ if (inline_cache == nullptr) {
+ // We got an OOME. Just clear the exception, and don't inline.
+ DCHECK(self->IsExceptionPending());
+ self->ClearException();
+ VLOG(compiler) << "Out of memory in the compiler when trying to inline";
+ }
+ return inline_cache;
+}
+
+bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
+ HInvoke* invoke_instruction,
+ ArtMethod* resolved_method)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (Runtime::Current()->IsAotCompiler() && !kUseAOTInlineCaches) {
+ return false;
+ }
+
+ StackHandleScope<1> hs(Thread::Current());
+ Handle<mirror::ObjectArray<mirror::Class>> inline_cache;
+ InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler()
+ ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
+ : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
+
+ switch (inline_cache_type) {
+ case kInlineCacheNoData: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " could not be statically determined";
+ return false;
+ }
+
+ case kInlineCacheUninitialized: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is not hit and not inlined";
+ return false;
+ }
+
+ case kInlineCacheMonomorphic: {
+ MaybeRecordStat(kMonomorphicCall);
+ if (outermost_graph_->IsCompilingOsr()) {
+ // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+ // interpreter and it may have seen different receiver types.
+ return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
} else {
- Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
- *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
- inline_cache);
- if (IsUninitialized(inline_cache)) {
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(method_index)
- << " is not hit and not inlined";
- return false;
- } else if (IsMonomorphic(inline_cache)) {
- MaybeRecordStat(kMonomorphicCall);
- if (outermost_graph_->IsCompilingOsr()) {
- // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
- // interpreter and it may have seen different receiver types.
- return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
- } else {
- return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
- }
- } else if (IsPolymorphic(inline_cache)) {
- MaybeRecordStat(kPolymorphicCall);
- return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
- } else {
- DCHECK(IsMegamorphic(inline_cache));
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(method_index)
- << " is megamorphic and not inlined";
- MaybeRecordStat(kMegamorphicCall);
- return false;
- }
+ return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
}
}
+
+ case kInlineCachePolymorphic: {
+ MaybeRecordStat(kPolymorphicCall);
+ return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+ }
+
+ case kInlineCacheMegamorphic: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is megamorphic and not inlined";
+ MaybeRecordStat(kMegamorphicCall);
+ return false;
+ }
+
+ case kInlineCacheMissingTypes: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is missing types and not inlined";
+ return false;
+ }
}
+ UNREACHABLE();
+}
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(method_index)
- << " could not be statically determined";
- return false;
+HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
+ HInvoke* invoke_instruction,
+ StackHandleScope<1>* hs,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+
+ ArtMethod* caller = graph_->GetArtMethod();
+ // Under JIT, we should always know the caller.
+ DCHECK(caller != nullptr);
+ ScopedProfilingInfoInlineUse spiis(caller, Thread::Current());
+ ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
+
+ if (profiling_info == nullptr) {
+ return kInlineCacheNoData;
+ }
+
+ *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+ if (inline_cache->Get() == nullptr) {
+ // We can't extract any data if we failed to allocate;
+ return kInlineCacheNoData;
+ } else {
+ Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
+ *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
+ *inline_cache);
+ return GetInlineCacheType(*inline_cache);
+ }
+}
+
+HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
+ const DexFile& caller_dex_file,
+ HInvoke* invoke_instruction,
+ StackHandleScope<1>* hs,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo();
+ if (pci == nullptr) {
+ return kInlineCacheNoData;
+ }
+
+ ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
+ bool found = pci->GetMethod(caller_dex_file.GetLocation(),
+ caller_dex_file.GetLocationChecksum(),
+ caller_compilation_unit_.GetDexMethodIndex(),
+ &offline_profile);
+ if (!found) {
+ return kInlineCacheNoData; // no profile information for this invocation.
+ }
+
+ *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+ if (inline_cache == nullptr) {
+ // We can't extract any data if we failed to allocate;
+ return kInlineCacheNoData;
+ } else {
+ return ExtractClassesFromOfflineProfile(invoke_instruction,
+ offline_profile,
+ *inline_cache);
+ }
+}
+
+HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
+ const HInvoke* invoke_instruction,
+ const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc());
+ if (it == offline_profile.inline_caches.end()) {
+ return kInlineCacheUninitialized;
+ }
+
+ const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second;
+
+ if (dex_pc_data.is_missing_types) {
+ return kInlineCacheMissingTypes;
+ }
+ if (dex_pc_data.is_megamorphic) {
+ return kInlineCacheMegamorphic;
+ }
+
+ DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize);
+ Thread* self = Thread::Current();
+ // We need to resolve the class relative to the containing dex file.
+ // So first, build a mapping from the index of dex file in the profile to
+ // its dex cache. This will avoid repeating the lookup when walking over
+ // the inline cache types.
+ std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache(
+ offline_profile.dex_references.size());
+ for (size_t i = 0; i < offline_profile.dex_references.size(); i++) {
+ bool found = false;
+ for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) {
+ if (offline_profile.dex_references[i].MatchesDex(dex_file)) {
+ dex_profile_index_to_dex_cache[i] =
+ caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file);
+ found = true;
+ }
+ }
+ if (!found) {
+ VLOG(compiler) << "Could not find profiled dex file: "
+ << offline_profile.dex_references[i].dex_location;
+ return kInlineCacheMissingTypes;
+ }
+ }
+
+ // Walk over the classes and resolve them. If we cannot find a type we return
+ // kInlineCacheMissingTypes.
+ int ic_index = 0;
+ for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) {
+ ObjPtr<mirror::DexCache> dex_cache =
+ dex_profile_index_to_dex_cache[class_ref.dex_profile_index];
+ DCHECK(dex_cache != nullptr);
+ ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
+ class_ref.type_index,
+ dex_cache,
+ caller_compilation_unit_.GetClassLoader().Get());
+ if (clazz != nullptr) {
+ inline_cache->Set(ic_index++, clazz);
+ } else {
+ VLOG(compiler) << "Could not resolve class from inline cache in AOT mode "
+ << caller_compilation_unit_.GetDexFile()->PrettyMethod(
+ invoke_instruction->GetDexMethodIndex()) << " : "
+ << caller_compilation_unit_
+ .GetDexFile()->StringByTypeIdx(class_ref.type_index);
+ return kInlineCacheMissingTypes;
+ }
+ }
+ return GetInlineCacheType(inline_cache);
}
HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
@@ -436,6 +678,32 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
return result;
}
+static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass,
+ ArtMethod* resolved_method,
+ HInstruction* invoke_instruction,
+ PointerSize pointer_size)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (Runtime::Current()->IsAotCompiler()) {
+ // We can get unrelated types when working with profiles (corruption,
+ // systme updates, or anyone can write to it). So first check if the class
+ // actually implements the declaring class of the method that is being
+ // called in bytecode.
+ // Note: the lookup methods used below require to have assignable types.
+ if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) {
+ return nullptr;
+ }
+ }
+
+ if (invoke_instruction->IsInvokeInterface()) {
+ resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size);
+ } else {
+ DCHECK(invoke_instruction->IsInvokeVirtual());
+ resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+ }
+ DCHECK(resolved_method != nullptr);
+ return resolved_method;
+}
+
bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
Handle<mirror::ObjectArray<mirror::Class>> classes) {
@@ -445,27 +713,29 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(
GetMonomorphicType(classes), caller_compilation_unit_);
if (!class_index.IsValid()) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because its class is not"
- << " accessible to the caller";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because its class is not"
+ << " accessible to the caller";
return false;
}
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
PointerSize pointer_size = class_linker->GetImagePointerSize();
- if (invoke_instruction->IsInvokeInterface()) {
- resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface(
- resolved_method, pointer_size);
- } else {
- DCHECK(invoke_instruction->IsInvokeVirtual());
- resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
- resolved_method, pointer_size);
+ Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
+ resolved_method = ResolveMethodFromInlineCache(
+ monomorphic_type, resolved_method, invoke_instruction, pointer_size);
+
+ LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
+ if (resolved_method == nullptr) {
+ // Bogus AOT profile, bail.
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ return false;
}
- DCHECK(resolved_method != nullptr);
+
HInstruction* receiver = invoke_instruction->InputAt(0);
HInstruction* cursor = invoke_instruction->GetPrevious();
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
- Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
if (!TryInlineAndReplace(invoke_instruction,
resolved_method,
ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
@@ -504,7 +774,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
- HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+ HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+ graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -549,21 +820,35 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
is_referrer,
invoke_instruction->GetDexPc(),
/* needs_access_check */ false);
- HLoadClass::LoadKind kind = HSharpening::SharpenClass(
+ HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
load_class, codegen_, compiler_driver_, caller_compilation_unit_);
DCHECK(kind != HLoadClass::LoadKind::kInvalid)
<< "We should always be able to reference a class for inline caches";
// Insert before setting the kind, as setting the kind affects the inputs.
bb_cursor->InsertInstructionAfter(load_class, receiver_class);
load_class->SetLoadKind(kind);
+ // In AOT mode, we will most likely load the class from BSS, which will involve a call
+ // to the runtime. In this case, the load instruction will need an environment so copy
+ // it from the invoke instruction.
+ if (load_class->NeedsEnvironment()) {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+ }
HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
bb_cursor->InsertInstructionAfter(compare, load_class);
if (with_deoptimization) {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+ DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
return compare;
}
@@ -590,11 +875,14 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
ArtMethod* method = nullptr;
Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i));
- if (invoke_instruction->IsInvokeInterface()) {
- method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size);
- } else {
- DCHECK(invoke_instruction->IsInvokeVirtual());
- method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+ method = ResolveMethodFromInlineCache(
+ handle, resolved_method, invoke_instruction, pointer_size);
+ if (method == nullptr) {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ // AOT profile is bogus. This loop expects to iterate over all entries,
+ // so just just continue.
+ all_targets_inlined = false;
+ continue;
}
HInstruction* receiver = invoke_instruction->InputAt(0);
@@ -603,6 +891,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
HInstruction* return_replacement = nullptr;
+ LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
if (!class_index.IsValid() ||
!TryBuildAndInline(invoke_instruction,
method,
@@ -612,8 +901,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
} else {
one_target_inlined = true;
- VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
- << " has inlined " << ArtMethod::PrettyMethod(method);
+ LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " has inlined " << ArtMethod::PrettyMethod(method);
// If we have inlined all targets before, and this receiver is the last seen,
// we deoptimize instead of keeping the original invoke instruction.
@@ -638,7 +927,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
// Because the inline cache data can be populated concurrently, we force the end of the
- // iteration. Otherhwise, we could see a new receiver type.
+ // iteration. Otherwise, we could see a new receiver type.
break;
} else {
CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
@@ -647,9 +936,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
if (!one_target_inlined) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because none"
- << " of its targets could be inlined";
+ LOG_FAIL_NO_STAT()
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because none"
+ << " of its targets could be inlined";
return false;
}
@@ -746,11 +1036,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
ArtMethod* resolved_method,
Handle<mirror::ObjectArray<mirror::Class>> classes) {
// This optimization only works under JIT for now.
- DCHECK(Runtime::Current()->UseJitCompilation());
- if (graph_->GetInstructionSet() == kMips64) {
- // TODO: Support HClassTableGet for mips64.
+ if (!Runtime::Current()->UseJitCompilation()) {
return false;
}
+
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
PointerSize pointer_size = class_linker->GetImagePointerSize();
@@ -784,9 +1073,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
actual_method = new_method;
} else if (actual_method != new_method) {
// Different methods, bailout.
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because it resolves"
- << " to different methods";
return false;
}
}
@@ -840,13 +1126,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
} else {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (return_replacement != nullptr) {
invoke_instruction->ReplaceWith(return_replacement);
}
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
// Run type propagation to get the guard typed.
@@ -859,6 +1151,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
MaybeRecordStat(kInlinedPolymorphicCall);
+ LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
return true;
}
@@ -873,11 +1166,23 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
if (invoke_instruction->IsInvokeInterface()) {
+ DCHECK(!method->IsProxyMethod());
// Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
// better than an invoke-interface because:
// 1) In the best case, the interface call has one more indirection (to fetch the IMT).
// 2) We will not go to the conflict trampoline with an invoke-virtual.
// TODO: Consider sharpening once it is not dependent on the compiler driver.
+
+ if (method->IsDefault() && !method->IsCopied()) {
+ // Changing to invoke-virtual cannot be done on an original default method
+ // since it's not in any vtable. Devirtualization by exact type/inline-cache
+ // always uses a method in the iftable which is never an original default
+ // method.
+ // On the other hand, inlining an original default method by CHA is fine.
+ DCHECK(cha_devirtualize);
+ return false;
+ }
+
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
uint32_t dex_method_index = FindMethodIndexIn(
method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
@@ -928,13 +1233,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
return true;
}
+size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
+ const HInliner* current = this;
+ size_t count = 0;
+ do {
+ if (current->graph_->GetArtMethod() == method) {
+ ++count;
+ }
+ current = current->parent_;
+ } while (current != nullptr);
+ return count;
+}
+
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
HInstruction** return_replacement) {
if (method->IsProxyMethod()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of unimplemented inline support for proxy methods.";
+ LOG_FAIL(kNotInlinedProxy)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because of unimplemented inline support for proxy methods.";
+ return false;
+ }
+
+ if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
+ LOG_FAIL(kNotInlinedRecursiveBudget)
+ << "Method "
+ << method->PrettyMethod()
+ << " is not inlined because it has reached its recursive call budget.";
return false;
}
@@ -943,15 +1269,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (!compiler_driver_->MayInline(method->GetDexFile(),
outer_compilation_unit_.GetDexFile())) {
if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
- VLOG(compiler) << "Successfully replaced pattern of invoke "
- << method->PrettyMethod();
+ LOG_SUCCESS() << "Successfully replaced pattern of invoke "
+ << method->PrettyMethod();
MaybeRecordStat(kReplacedInvokeWithSimplePattern);
return true;
}
- VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
- << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
- << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
- << method->GetDexFile()->GetLocation();
+ LOG_FAIL(kNotInlinedWont)
+ << "Won't inline " << method->PrettyMethod() << " in "
+ << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+ << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+ << method->GetDexFile()->GetLocation();
return false;
}
@@ -960,30 +1287,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
const DexFile::CodeItem* code_item = method->GetCodeItem();
if (code_item == nullptr) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is native";
+ LOG_FAIL_NO_STAT()
+ << "Method " << method->PrettyMethod() << " is not inlined because it is native";
return false;
}
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is too big to inline: "
- << code_item->insns_size_in_code_units_
- << " > "
- << inline_max_code_units;
+ LOG_FAIL(kNotInlinedCodeItem)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because its code item is too big: "
+ << code_item->insns_size_in_code_units_
+ << " > "
+ << inline_max_code_units;
return false;
}
if (code_item->tries_size_ != 0) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of try block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << method->PrettyMethod() << " is not inlined because of try block";
return false;
}
if (!method->IsCompilable()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " has soft failures un-handled by the compiler, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " has soft failures un-handled by the compiler, so it cannot be inlined";
}
if (!method->GetDeclaringClass()->IsVerified()) {
@@ -991,8 +1320,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " couldn't be verified, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " couldn't be verified, so it cannot be inlined";
return false;
}
}
@@ -1001,9 +1331,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is static and requires a clinit"
- << " check that cannot be emitted due to Dex cache limitations";
+ LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+ << " is not inlined because it is static and requires a clinit"
+ << " check that cannot be emitted due to Dex cache limitations";
return false;
}
@@ -1012,7 +1342,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
+ LOG_SUCCESS() << method->PrettyMethod();
MaybeRecordStat(kInlinedInvoke);
return true;
}
@@ -1064,9 +1394,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
// TODO: Needs null check.
return false;
}
- Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
- HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj);
+ HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj);
DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
@@ -1079,10 +1408,9 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
// TODO: Needs null check.
return false;
}
- Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg);
- HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value);
+ HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value);
DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset);
DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile);
invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
@@ -1116,24 +1444,19 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
[](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
// Create HInstanceFieldSet for each IPUT that stores non-zero data.
- Handle<mirror::DexCache> dex_cache;
HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
bool needs_constructor_barrier = false;
for (size_t i = 0; i != number_of_iputs; ++i) {
HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
- if (dex_cache.GetReference() == nullptr) {
- dex_cache = handles_->NewHandle(resolved_method->GetDexCache());
- }
uint16_t field_index = iput_field_indexes[i];
- HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value);
+ bool is_final;
+ HInstanceFieldSet* iput =
+ CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final);
invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
// Check whether the field is final. If it is, we need to add a barrier.
- PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
- ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
- DCHECK(resolved_field != nullptr);
- if (resolved_field->IsFinal()) {
+ if (is_final) {
needs_constructor_barrier = true;
}
}
@@ -1152,12 +1475,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
return true;
}
-HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
- uint32_t field_index,
+HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
+ ArtMethod* referrer,
HInstruction* obj)
REQUIRES_SHARED(Locks::mutator_lock_) {
- PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
- ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ ArtField* resolved_field =
+ class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
DCHECK(resolved_field != nullptr);
HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
obj,
@@ -1167,12 +1491,13 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex
resolved_field->IsVolatile(),
field_index,
resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
- *dex_cache->GetDexFile(),
+ *referrer->GetDexFile(),
// Read barrier generates a runtime call in slow path and we need a valid
// dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
/* dex_pc */ 0);
if (iget->GetType() == Primitive::kPrimNot) {
// Use the same dex_cache that we used for field lookup as the hint_dex_cache.
+ Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache());
ReferenceTypePropagation rtp(graph_,
outer_compilation_unit_.GetClassLoader(),
dex_cache,
@@ -1183,14 +1508,21 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex
return iget;
}
-HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
- uint32_t field_index,
+HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
+ ArtMethod* referrer,
HInstruction* obj,
- HInstruction* value)
+ HInstruction* value,
+ bool* is_final)
REQUIRES_SHARED(Locks::mutator_lock_) {
- PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
- ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ ArtField* resolved_field =
+ class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
DCHECK(resolved_field != nullptr);
+ if (is_final != nullptr) {
+ // This information is needed only for constructors.
+ DCHECK(referrer->IsConstructor());
+ *is_final = resolved_field->IsFinal();
+ }
HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
obj,
value,
@@ -1200,7 +1532,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex
resolved_field->IsVolatile(),
field_index,
resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
- *dex_cache->GetDexFile(),
+ *referrer->GetDexFile(),
// Read barrier generates a runtime call in slow path and we need a valid
// dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
/* dex_pc */ 0);
@@ -1298,15 +1630,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
handles_);
if (builder.BuildGraph() != kAnalysisSuccess) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be built, so cannot be inlined";
+ LOG_FAIL(kNotInlinedCannotBuild)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be built, so cannot be inlined";
return false;
}
if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
compiler_driver_->GetInstructionSet())) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " cannot be inlined because of the register allocator";
+ LOG_FAIL(kNotInlinedRegisterAllocator)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " cannot be inlined because of the register allocator";
return false;
}
@@ -1353,15 +1687,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
/* is_first_run */ false).Run();
}
- size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
- size_t number_of_inlined_instructions =
- RunOptimizations(callee_graph, code_item, dex_compilation_unit);
- number_of_instructions_budget += number_of_inlined_instructions;
+ RunOptimizations(callee_graph, code_item, dex_compilation_unit);
HBasicBlock* exit_block = callee_graph->GetExitBlock();
if (exit_block == nullptr) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it has an infinite loop";
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it has an infinite loop";
return false;
}
@@ -1370,15 +1702,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
if (predecessor->GetLastInstruction()->IsThrow()) {
if (invoke_instruction->GetBlock()->IsTryBlock()) {
// TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because one branch always throws and"
- << " caller is in a try/catch block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller is in a try/catch block";
return false;
} else if (graph_->GetExitBlock() == nullptr) {
// TODO(ngeoffray): Support adding HExit in the caller graph.
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller does not have an exit block";
+ return false;
+ } else if (graph_->HasIrreducibleLoops()) {
+ // TODO(ngeoffray): Support re-computing loop information to graphs with
+ // irreducible loops?
VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because one branch always throws and"
- << " caller does not have an exit block";
+ << " caller has irreducible loops";
return false;
}
} else {
@@ -1387,32 +1728,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
}
if (!has_one_return) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it always throws";
+ LOG_FAIL(kNotInlinedAlwaysThrows)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it always throws";
return false;
}
size_t number_of_instructions = 0;
-
- bool can_inline_environment =
- total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
-
// Skip the entry block, it does not contain instructions that prevent inlining.
for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
if (block->IsLoopHeader()) {
if (block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains an irreducible loop";
+ LOG_FAIL(kNotInlinedIrreducibleLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains an irreducible loop";
return false;
}
if (!block->GetLoopInformation()->HasExitEdge()) {
// Don't inline methods with loops without exit, since they cause the
// loop information to be computed incorrectly when updating after
// inlining.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains a loop with no exit";
+ LOG_FAIL(kNotInlinedLoopWithoutExit)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains a loop with no exit";
return false;
}
}
@@ -1420,34 +1760,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
for (HInstructionIterator instr_it(block->GetInstructions());
!instr_it.Done();
instr_it.Advance()) {
- if (number_of_instructions++ == number_of_instructions_budget) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its instruction budget limit.";
+ if (++number_of_instructions >= inlining_budget_) {
+ LOG_FAIL(kNotInlinedInstructionBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because the outer method has reached"
+ << " its instruction budget limit.";
return false;
}
HInstruction* current = instr_it.Current();
- if (!can_inline_environment && current->NeedsEnvironment()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its environment budget limit.";
+ if (current->NeedsEnvironment() &&
+ (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
+ LOG_FAIL(kNotInlinedEnvironmentBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because its caller has reached"
+ << " its environment budget limit.";
return false;
}
if (current->NeedsEnvironment() &&
!CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
resolved_method)) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " needs an environment, is in a different dex file"
- << ", and cannot be encoded in the stack maps.";
+ LOG_FAIL(kNotInlinedStackMaps)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " needs an environment, is in a different dex file"
+ << ", and cannot be encoded in the stack maps.";
return false;
}
if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " it is in a different dex file and requires access to the dex cache";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " it is in a different dex file and requires access to the dex cache";
return false;
}
@@ -1456,21 +1801,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
current->IsUnresolvedStaticFieldSet() ||
current->IsUnresolvedInstanceFieldSet()) {
// Entrypoint for unresolved fields does not handle inlined frames.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it is using an unresolved"
- << " entrypoint";
+ LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it is using an unresolved"
+ << " entrypoint";
return false;
}
}
}
- number_of_inlined_instructions_ += number_of_instructions;
-
DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
<< "No instructions can be added to the outer graph while inner graph is being built";
+ // Inline the callee graph inside the caller graph.
const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
graph_->SetCurrentInstructionId(callee_instruction_counter);
*return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+ // Update our budget for other inlining attempts in `caller_graph`.
+ total_number_of_instructions_ += number_of_instructions;
+ UpdateInliningBudget();
DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
<< "No instructions can be added to the inner graph during inlining into the outer graph";
@@ -1483,15 +1831,15 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return true;
}
-size_t HInliner::RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit) {
+void HInliner::RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
- InstructionSimplifier simplify(callee_graph, inline_stats_);
+ InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
HOptimization* optimizations[] = {
@@ -1507,23 +1855,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
optimization->Run();
}
- size_t number_of_inlined_instructions = 0u;
- if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
- HInliner inliner(callee_graph,
- outermost_graph_,
- codegen_,
- outer_compilation_unit_,
- dex_compilation_unit,
- compiler_driver_,
- handles_,
- inline_stats_,
- total_number_of_dex_registers_ + code_item->registers_size_,
- depth_ + 1);
- inliner.Run();
- number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+ // Bail early for pathological cases on the environment (for example recursive calls,
+ // or too large environment).
+ if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its environment budget limit.";
+ return;
+ }
+
+ // Bail early if we know we already are over the limit.
+ size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
+ if (number_of_instructions > inlining_budget_) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its instruction budget limit. " << number_of_instructions;
+ return;
}
- return number_of_inlined_instructions;
+ HInliner inliner(callee_graph,
+ outermost_graph_,
+ codegen_,
+ outer_compilation_unit_,
+ dex_compilation_unit,
+ compiler_driver_,
+ handles_,
+ inline_stats_,
+ total_number_of_dex_registers_ + code_item->registers_size_,
+ total_number_of_instructions_ + number_of_instructions,
+ this,
+ depth_ + 1);
+ inliner.Run();
}
static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 75d025ae41..9e4685cbf4 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -20,6 +20,7 @@
#include "dex_file_types.h"
#include "invoke_type.h"
#include "optimization.h"
+#include "jit/profile_compilation_info.h"
namespace art {
@@ -41,7 +42,9 @@ class HInliner : public HOptimization {
VariableSizedHandleScope* handles,
OptimizingCompilerStats* stats,
size_t total_number_of_dex_registers,
- size_t depth)
+ size_t total_number_of_instructions,
+ HInliner* parent,
+ size_t depth = 0)
: HOptimization(outer_graph, kInlinerPassName, stats),
outermost_graph_(outermost_graph),
outer_compilation_unit_(outer_compilation_unit),
@@ -49,8 +52,10 @@ class HInliner : public HOptimization {
codegen_(codegen),
compiler_driver_(compiler_driver),
total_number_of_dex_registers_(total_number_of_dex_registers),
+ total_number_of_instructions_(total_number_of_instructions),
+ parent_(parent),
depth_(depth),
- number_of_inlined_instructions_(0),
+ inlining_budget_(0),
handles_(handles),
inline_stats_(nullptr) {}
@@ -59,6 +64,15 @@ class HInliner : public HOptimization {
static constexpr const char* kInlinerPassName = "inliner";
private:
+ enum InlineCacheType {
+ kInlineCacheNoData = 0,
+ kInlineCacheUninitialized = 1,
+ kInlineCacheMonomorphic = 2,
+ kInlineCachePolymorphic = 3,
+ kInlineCacheMegamorphic = 4,
+ kInlineCacheMissingTypes = 5
+ };
+
bool TryInline(HInvoke* invoke_instruction);
// Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
@@ -85,10 +99,10 @@ class HInliner : public HOptimization {
HInstruction** return_replacement);
// Run simple optimizations on `callee_graph`.
- // Returns the number of inlined instructions.
- size_t RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit);
+ void RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -97,14 +111,54 @@ class HInliner : public HOptimization {
REQUIRES_SHARED(Locks::mutator_lock_);
// Create a new HInstanceFieldGet.
- HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
- uint32_t field_index,
+ HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
+ ArtMethod* referrer,
HInstruction* obj);
// Create a new HInstanceFieldSet.
- HInstanceFieldSet* CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
- uint32_t field_index,
+ HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
+ ArtMethod* referrer,
HInstruction* obj,
- HInstruction* value);
+ HInstruction* value,
+ bool* is_final = nullptr);
+
+ // Try inlining the invoke instruction using inline caches.
+ bool TryInlineFromInlineCache(
+ const DexFile& caller_dex_file,
+ HInvoke* invoke_instruction,
+ ArtMethod* resolved_method)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Try getting the inline cache from JIT code cache.
+ // Return true if the inline cache was successfully allocated and the
+ // invoke info was found in the profile info.
+ InlineCacheType GetInlineCacheJIT(
+ HInvoke* invoke_instruction,
+ StackHandleScope<1>* hs,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Try getting the inline cache from AOT offline profile.
+ // Return true if the inline cache was successfully allocated and the
+ // invoke info was found in the profile info.
+ InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
+ HInvoke* invoke_instruction,
+ StackHandleScope<1>* hs,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
+ // Note that even if we have profile data for the invoke the inline_cache might contain
+ // only null entries if the types cannot be resolved.
+ InlineCacheType ExtractClassesFromOfflineProfile(
+ const HInvoke* invoke_instruction,
+ const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+ /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Compute the inline cache type.
+ InlineCacheType GetInlineCacheType(
+ const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to inline the target of a monomorphic call. If successful, the code
// in the graph will look like:
@@ -209,14 +263,30 @@ class HInliner : public HOptimization {
HInstruction* return_replacement,
HInstruction* invoke_instruction);
+ // Update the inlining budget based on `total_number_of_instructions_`.
+ void UpdateInliningBudget();
+
+ // Count the number of calls of `method` being inlined recursively.
+ size_t CountRecursiveCallsOf(ArtMethod* method) const;
+
+ // Pretty-print for spaces during logging.
+ std::string DepthString(int line) const;
+
HGraph* const outermost_graph_;
const DexCompilationUnit& outer_compilation_unit_;
const DexCompilationUnit& caller_compilation_unit_;
CodeGenerator* const codegen_;
CompilerDriver* const compiler_driver_;
const size_t total_number_of_dex_registers_;
+ size_t total_number_of_instructions_;
+
+ // The 'parent' inliner, that means the inlinigng optimization that requested
+ // `graph_` to be inlined.
+ const HInliner* const parent_;
const size_t depth_;
- size_t number_of_inlined_instructions_;
+
+ // The budget left for inlining, in number of instructions.
+ size_t inlining_budget_;
VariableSizedHandleScope* const handles_;
// Used to record stats about optimizations on the inlined graph.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c60f6e5393..88f67fae04 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -37,37 +37,45 @@ HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
return block_builder_->GetBlockAt(dex_pc);
}
-ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+inline ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
const size_t vregs = graph_->GetNumberOfVRegs();
- if (locals->size() != vregs) {
- locals->resize(vregs, nullptr);
-
- if (block->IsCatchBlock()) {
- // We record incoming inputs of catch phis at throwing instructions and
- // must therefore eagerly create the phis. Phis for undefined vregs will
- // be deleted when the first throwing instruction with the vreg undefined
- // is encountered. Unused phis will be removed by dead phi analysis.
- for (size_t i = 0; i < vregs; ++i) {
- // No point in creating the catch phi if it is already undefined at
- // the first throwing instruction.
- HInstruction* current_local_value = (*current_locals_)[i];
- if (current_local_value != nullptr) {
- HPhi* phi = new (arena_) HPhi(
- arena_,
- i,
- 0,
- current_local_value->GetType());
- block->AddPhi(phi);
- (*locals)[i] = phi;
- }
+ if (locals->size() == vregs) {
+ return locals;
+ }
+ return GetLocalsForWithAllocation(block, locals, vregs);
+}
+
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation(
+ HBasicBlock* block,
+ ArenaVector<HInstruction*>* locals,
+ const size_t vregs) {
+ DCHECK_NE(locals->size(), vregs);
+ locals->resize(vregs, nullptr);
+ if (block->IsCatchBlock()) {
+ // We record incoming inputs of catch phis at throwing instructions and
+ // must therefore eagerly create the phis. Phis for undefined vregs will
+ // be deleted when the first throwing instruction with the vreg undefined
+ // is encountered. Unused phis will be removed by dead phi analysis.
+ for (size_t i = 0; i < vregs; ++i) {
+ // No point in creating the catch phi if it is already undefined at
+ // the first throwing instruction.
+ HInstruction* current_local_value = (*current_locals_)[i];
+ if (current_local_value != nullptr) {
+ HPhi* phi = new (arena_) HPhi(
+ arena_,
+ i,
+ 0,
+ current_local_value->GetType());
+ block->AddPhi(phi);
+ (*locals)[i] = phi;
}
}
}
return locals;
}
-HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+inline HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
return (*locals)[local];
}
@@ -1676,10 +1684,10 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
dex_pc,
needs_access_check);
- HLoadClass::LoadKind load_kind = HSharpening::SharpenClass(load_class,
- code_generator_,
- compiler_driver_,
- *dex_compilation_unit_);
+ HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class,
+ code_generator_,
+ compiler_driver_,
+ *dex_compilation_unit_);
if (load_kind == HLoadClass::LoadKind::kInvalid) {
// We actually cannot reference this class, we're forced to bail.
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index e735a0c46d..7fdc1883ca 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -93,6 +93,10 @@ class HInstructionBuilder : public ValueObject {
HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+ // Out of line version of GetLocalsFor(), which has a fast path that is
+ // beneficial to get inlined by callers.
+ ArenaVector<HInstruction*>* GetLocalsForWithAllocation(
+ HBasicBlock* block, ArenaVector<HInstruction*>* locals, const size_t vregs);
HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 35f59cb4a4..60790e5b84 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -19,14 +19,18 @@
#include "escape.h"
#include "intrinsics.h"
#include "mirror/class-inl.h"
+#include "sharpening.h"
#include "scoped_thread_state_change-inl.h"
namespace art {
class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
public:
- InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ InstructionSimplifierVisitor(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph),
+ codegen_(codegen),
stats_(stats) {}
void Run();
@@ -112,6 +116,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ CodeGenerator* codegen_;
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
int simplifications_at_current_position_ = 0;
@@ -123,7 +128,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
};
void InstructionSimplifier::Run() {
- InstructionSimplifierVisitor visitor(graph_, stats_);
+ InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
visitor.Run();
}
@@ -1805,6 +1810,8 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
{
ScopedObjectAccess soa(Thread::Current());
+ Primitive::Type source_component_type = Primitive::kPrimVoid;
+ Primitive::Type destination_component_type = Primitive::kPrimVoid;
ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
if (destination_rti.IsValid()) {
if (destination_rti.IsObjectArray()) {
@@ -1814,6 +1821,8 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
optimizations.SetDestinationIsTypedObjectArray();
}
if (destination_rti.IsPrimitiveArrayClass()) {
+ destination_component_type =
+ destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
optimizations.SetDestinationIsPrimitiveArray();
} else if (destination_rti.IsNonPrimitiveArrayClass()) {
optimizations.SetDestinationIsNonPrimitiveArray();
@@ -1826,10 +1835,55 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
}
if (source_rti.IsPrimitiveArrayClass()) {
optimizations.SetSourceIsPrimitiveArray();
+ source_component_type = source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
} else if (source_rti.IsNonPrimitiveArrayClass()) {
optimizations.SetSourceIsNonPrimitiveArray();
}
}
+ // For primitive arrays, use their optimized ArtMethod implementations.
+ if ((source_component_type != Primitive::kPrimVoid) &&
+ (source_component_type == destination_component_type)) {
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ PointerSize image_size = class_linker->GetImagePointerSize();
+ HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+ mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass();
+ ArtMethod* method = nullptr;
+ switch (source_component_type) {
+ case Primitive::kPrimBoolean:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size);
+ break;
+ case Primitive::kPrimByte:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size);
+ break;
+ case Primitive::kPrimChar:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size);
+ break;
+ case Primitive::kPrimShort:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size);
+ break;
+ case Primitive::kPrimInt:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size);
+ break;
+ case Primitive::kPrimFloat:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size);
+ break;
+ case Primitive::kPrimLong:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size);
+ break;
+ case Primitive::kPrimDouble:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size);
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ }
+ DCHECK(method != nullptr);
+ invoke->SetResolvedMethod(method);
+ // Sharpen the new invoke. Note that we do not update the dex method index of
+ // the invoke, as we would need to look it up in the current dex file, and it
+ // is unlikely that it exists. The most usual situation for such typed
+ // arraycopy methods is a direct pointer to the boot image.
+ HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+ }
}
}
@@ -2078,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
if (cond->IsConstant()) {
if (cond->AsIntConstant()->IsFalse()) {
// Never deopt: instruction can be removed.
+ if (deoptimize->GuardsAnInput()) {
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ }
deoptimize->GetBlock()->RemoveInstruction(deoptimize);
} else {
// Always deopt.
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 7fe1067aa9..f7329a4a1f 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -23,6 +23,8 @@
namespace art {
+class CodeGenerator;
+
/**
* Implements optimizations specific to each instruction.
*
@@ -36,15 +38,19 @@ namespace art {
class InstructionSimplifier : public HOptimization {
public:
explicit InstructionSimplifier(HGraph* graph,
+ CodeGenerator* codegen,
OptimizingCompilerStats* stats = nullptr,
const char* name = kInstructionSimplifierPassName)
- : HOptimization(graph, name, stats) {}
+ : HOptimization(graph, name, stats),
+ codegen_(codegen) {}
static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
void Run() OVERRIDE;
private:
+ CodeGenerator* codegen_;
+
DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
};
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 17d683f357..8df80adc9f 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,6 +19,7 @@
#include "art_method.h"
#include "class_linker.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "invoke_type.h"
#include "mirror/dex_cache-inl.h"
#include "nodes.h"
@@ -178,4 +179,112 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
return os;
}
+void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ Location return_location,
+ Location first_argument_location) {
+ if (Runtime::Current()->IsAotCompiler()) {
+ if (codegen->GetCompilerOptions().IsBootImage() ||
+ codegen->GetCompilerOptions().GetCompilePic()) {
+ // TODO(ngeoffray): Support boot image compilation.
+ return;
+ }
+ }
+
+ IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
+
+ // Most common case is that we have found all we needed (classes are initialized
+ // and in the boot image). Bail if not.
+ if (info.integer_cache == nullptr ||
+ info.integer == nullptr ||
+ info.cache == nullptr ||
+ info.value_offset == 0 ||
+ // low and high cannot be 0, per the spec.
+ info.low == 0 ||
+ info.high == 0) {
+ LOG(INFO) << "Integer.valueOf will not be optimized";
+ return;
+ }
+
+ // The intrinsic will call if it needs to allocate a j.l.Integer.
+ LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+ invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+ if (!invoke->InputAt(0)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->AddTemp(first_argument_location);
+ locations->SetOut(return_location);
+}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+ // Note that we could cache all of the data looked up here. but there's no good
+ // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
+ // jni values. Adding it as state to the compiler singleton seems like wrong
+ // separation of concerns.
+ // The need for this data should be pretty rare though.
+
+ // The most common case is that the classes are in the boot image and initialized,
+ // which is easy to generate code for. We bail if not.
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ gc::Heap* heap = runtime->GetHeap();
+ IntegerValueOfInfo info;
+ info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
+ if (info.integer_cache == nullptr) {
+ self->ClearException();
+ return info;
+ }
+ if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
+ // Optimization only works if the class is initialized and in the boot image.
+ return info;
+ }
+ info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
+ if (info.integer == nullptr) {
+ self->ClearException();
+ return info;
+ }
+ if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
+ // Optimization only works if the class is initialized and in the boot image.
+ return info;
+ }
+
+ ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+ if (field == nullptr) {
+ return info;
+ }
+ info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
+ field->GetObject(info.integer_cache).Ptr());
+ if (info.cache == nullptr) {
+ return info;
+ }
+
+ if (!heap->ObjectIsInBootImageSpace(info.cache)) {
+ // Optimization only works if the object is in the boot image.
+ return info;
+ }
+
+ field = info.integer->FindDeclaredInstanceField("value", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.value_offset = field->GetOffset().Int32Value();
+
+ field = info.integer_cache->FindDeclaredStaticField("low", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.low = field->GetInt(info.integer_cache);
+
+ field = info.integer_cache->FindDeclaredStaticField("high", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.high = field->GetInt(info.integer_cache);
+
+ DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
+ return info;
+}
+
} // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6425e1313f..9da5a7fa3b 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -113,6 +113,39 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
}
+ static void ComputeIntegerValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ Location return_location,
+ Location first_argument_location);
+
+ // Temporary data structure for holding Integer.valueOf useful data. We only
+ // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
+ // mirror::Class pointers in this structure.
+ struct IntegerValueOfInfo {
+ IntegerValueOfInfo()
+ : integer_cache(nullptr),
+ integer(nullptr),
+ cache(nullptr),
+ low(0),
+ high(0),
+ value_offset(0) {}
+
+ // The java.lang.IntegerCache class.
+ mirror::Class* integer_cache;
+ // The java.lang.Integer class.
+ mirror::Class* integer;
+ // Value of java.lang.IntegerCache#cache.
+ mirror::ObjectArray<mirror::Object>* cache;
+ // Value of java.lang.IntegerCache#low.
+ int32_t low;
+ // Value of java.lang.IntegerCache#high.
+ int32_t high;
+ // The offset of java.lang.Integer.value.
+ int32_t value_offset;
+ };
+
+ static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+
protected:
IntrinsicVisitor() {}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 751623c177..1006a776f0 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,54 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+#define __ assembler->
+
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(ArmAssembler* assembler,
+ Primitive::Type type,
+ const Register& array,
+ const Location& pos,
+ const Register& base) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ if (pos.IsConstant()) {
+ int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(base, array, element_size * constant + data_offset);
+ } else {
+ __ add(base, array, ShifterOperand(pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(base, data_offset);
+ }
+}
+
+// Compute end address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(ArmAssembler* assembler,
+ Primitive::Type type,
+ const Location& copy_length,
+ const Register& base,
+ const Register& end) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+
+ if (copy_length.IsConstant()) {
+ int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(end, base, element_size * constant);
+ } else {
+ __ add(end, base, ShifterOperand(copy_length.AsRegister<Register>(), LSL, element_size_shift));
+ }
+}
+
+#undef __
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
@@ -55,6 +103,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ ArmAssembler* assembler = arm_codegen->GetAssembler();
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
DCHECK(instruction_->IsInvokeStaticOrDirect())
@@ -63,9 +112,8 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+ Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
@@ -76,15 +124,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
__ Bind(GetEntryLabel());
// Compute the base destination address in `dst_curr_addr`.
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
- } else {
- __ add(dst_curr_addr,
- dest,
- ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
- __ AddConstant(dst_curr_addr, offset);
- }
+ GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
Label loop;
__ Bind(&loop);
@@ -108,6 +148,8 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
DCHECK_NE(src_stop_addr, IP);
DCHECK_NE(tmp, IP);
DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ // TODO: Load the entrypoint once before the loop, instead of
+ // loading it at every iteration.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
// This runtime call does not require a stack map.
@@ -129,6 +171,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
: arena_(codegen->GetGraph()->GetArena()),
+ codegen_(codegen),
assembler_(codegen->GetAssembler()),
features_(codegen->GetInstructionSetFeatures()) {}
@@ -227,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -239,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
Register in_reg_lo = in.AsRegisterPairLow<Register>();
Register in_reg_hi = in.AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
__ clz(out, in_reg_lo);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
__ clz(out, in.AsRegister<Register>());
}
@@ -254,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -266,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Register out = locations->Out().AsRegister<Register>();
if (type == Primitive::kPrimLong) {
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ rbit(out, in_reg_lo);
__ clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
__ rbit(out, in_reg_hi);
__ clz(out, out);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
Register in = locations->InAt(0).AsRegister<Register>();
__ rbit(out, in);
@@ -303,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -315,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -1312,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
Label end;
Label return_true;
Label return_false;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1385,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ LoadImmediate(out, 1);
- __ b(&end);
+ __ b(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ LoadImmediate(out, 0);
- __ Bind(&end);
+
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -1924,138 +1981,113 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
__ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-
- // Compute the base source address in `temp1`.
- if (src_pos.IsConstant()) {
- int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp1, src, element_size * constant + offset);
+ if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
+ // Null constant length: not need to emit the loop code at all.
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
- __ AddConstant(temp1, offset);
- }
-
- // Compute the end source address in `temp3`.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp3, temp1, element_size * constant);
- } else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
- }
+ Label done;
+ const Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // TODO: Also convert this intrinsic to the IsGcMarking strategy?
-
- // The base destination address is computed later, as `temp2` is
- // used for intermediate computations.
-
- // SystemArrayCopy implementation for Baker read barriers (see
- // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
- //
- // if (src_ptr != end_ptr) {
- // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // // Slow-path copy.
- // do {
- // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
- // } while (src_ptr != end_ptr)
- // } else {
- // // Fast-path copy.
- // do {
- // *dest_ptr++ = *src_ptr++;
- // } while (src_ptr != end_ptr)
- // }
- // }
-
- Label loop, done;
-
- // Don't enter copy loop if `length == 0`.
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
-
- // /* int32_t */ monitor = src->monitor_
- __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `src` is unchanged by this operation, but its value now depends
- // on `temp2`.
- __ add(src, src, ShifterOperand(temp2, LSR, 32));
-
- // Slow path used to copy array when `src` is gray.
- SlowPathCode* read_barrier_slow_path =
- new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
- codegen_->AddSlowPath(read_barrier_slow_path);
-
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
- // Carry flag is the last bit shifted out by LSRS.
- __ b(read_barrier_slow_path->GetEntryLabel(), CS);
-
- // Fast-path copy.
-
- // Compute the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
- __ AddConstant(temp2, offset);
+ if (length.IsRegister()) {
+ // Don't enter the copy loop if the length is null.
+ __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done);
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
-
- __ Bind(read_barrier_slow_path->GetExitLabel());
- __ Bind(&done);
- } else {
- // Non read barrier code.
-
- // Compute the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Compute the base source address in `temp1`.
+ // Note that `temp1` (the base source address) is computed from
+ // `src` (and `src_pos`) here, and thus honors the artificial
+ // dependency of `src` on `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+ // Compute the end source address in `temp3`.
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // Slow path used to copy array when `src` is gray.
+ // Note that the base destination address is computed in `temp2`
+ // by the slow path code.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+ // Compute the base destination address in `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
} else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
- __ AddConstant(temp2, offset);
+ // Non read barrier code.
+ // Compute the base source address in `temp1`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+ // Compute the base destination address in `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+ // Compute the end source address in `temp3`.
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
}
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
__ Bind(&done);
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1,
- temp2,
- dest,
- Register(kNoRegister),
- /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -2473,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
Label done, compressed_string_loop;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ add(dst_ptr, dstObj, ShifterOperand(data_offset));
__ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
__ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
// Early out for valid zero-length retrievals.
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// src range to copy.
__ add(src_ptr, srcObj, ShifterOperand(value_offset));
@@ -2516,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&loop, GE);
__ adds(num_chr, num_chr, ShifterOperand(4));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2527,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&remainder, GT);
if (mirror::kUseStringCompression) {
- __ b(&done);
+ __ b(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2541,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&compressed_string_loop, GT);
}
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
@@ -2646,6 +2681,75 @@ void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(R0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ ArmAssembler* const assembler = GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>();
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ LoadImmediate(IP, value);
+ __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ // Check bounds of our cache.
+ __ AddConstant(out, in, -info.low);
+ __ CmpConstant(out, info.high - info.low + 1);
+ Label allocate, done;
+ __ b(&allocate, HS);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
+ __ MaybeUnpoisonHeapReference(out);
+ __ b(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 7f20ea4b1f..2840863632 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -51,6 +51,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGenerator* codegen_;
ArmAssembler* assembler_;
const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f38642242d..423fd3c6ae 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -198,6 +198,8 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
DCHECK_NE(tmp_.reg(), IP0);
DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ // TODO: Load the entrypoint once before the loop, instead of
+ // loading it at every iteration.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
// This runtime call does not require a stack map.
@@ -853,7 +855,6 @@ static void GenUnsafeGet(HInvoke* invoke,
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
- MacroAssembler* masm = codegen->GetVIXLAssembler();
Location base_loc = locations->InAt(1);
Register base = WRegisterFrom(base_loc); // Object pointer.
Location offset_loc = locations->InAt(2);
@@ -863,8 +864,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
- UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireW();
+ Register temp = WRegisterFrom(locations->GetTemp(0));
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
@@ -901,6 +901,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1559,7 +1562,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
// Load `count` field of the argument string and check if it matches the const string.
// Also compares the compression style, if differs return false.
__ Ldr(temp, MemOperand(arg.X(), count_offset));
+ // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
+ scratch_scope.Release(temp1);
__ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
+ temp1 = scratch_scope.AcquireW();
__ B(&return_false, ne);
} else {
// Load `count` fields of this and argument strings.
@@ -2187,8 +2193,9 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
}
}
-// Compute base source address, base destination address, and end source address
-// for System.arraycopy* intrinsics.
+// Compute base source address, base destination address, and end
+// source address for System.arraycopy* intrinsics in `src_base`,
+// `dst_base` and `src_end` respectively.
static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
Primitive::Type type,
const Register& src,
@@ -2199,12 +2206,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
const Register& src_base,
const Register& dst_base,
const Register& src_end) {
+ // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
<< "Unexpected element type: " << type;
const int32_t element_size = Primitive::ComponentSize(type);
const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
- uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ Add(src_base, src, element_size * constant + data_offset);
@@ -2381,9 +2389,14 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// Temporary register IP0, obtained from the VIXL scratch register
// pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
// (because that register is clobbered by ReadBarrierMarkRegX
- // entry points). Get an extra temporary register from the
- // register allocator.
+ // entry points). It cannot be used in calls to
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // either. For these reasons, get a third extra temporary register
+ // from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ } else {
+ // Cases other than Baker read barriers: the third temporary will
+ // be acquired from the VIXL scratch register pool.
}
}
@@ -2494,11 +2507,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
- // Note: Because it is acquired from VIXL's scratch register pool,
- // `temp3` might be IP0, and thus cannot be used as `ref` argument
- // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
- // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
- Register temp3 = temps.AcquireW();
+ Register temp3;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ temp3 = WRegisterFrom(locations->GetTemp(2));
+ } else {
+ temp3 = temps.AcquireW();
+ }
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
@@ -2702,122 +2716,131 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
- Register src_curr_addr = temp1.X();
- Register dst_curr_addr = temp2.X();
- Register src_stop_addr;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // Temporary register IP0, obtained from the VIXL scratch
- // register pool as `temp3`, cannot be used in
- // ReadBarrierSystemArrayCopySlowPathARM64 (because that
- // register is clobbered by ReadBarrierMarkRegX entry points).
- // So another temporary register allocated by the register
- // allocator instead.
- DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
- src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
+ // Null constant length: not need to emit the loop code at all.
} else {
- src_stop_addr = temp3.X();
- }
-
- GenSystemArrayCopyAddresses(masm,
- Primitive::kPrimNot,
- src,
- src_pos,
- dest,
- dest_pos,
- length,
- src_curr_addr,
- dst_curr_addr,
- src_stop_addr);
-
- const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ Register src_curr_addr = temp1.X();
+ Register dst_curr_addr = temp2.X();
+ Register src_stop_addr = temp3.X();
+ vixl::aarch64::Label done;
+ const Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
+
+ if (length.IsRegister()) {
+ // Don't enter the copy loop if the length is null.
+ __ Cbz(WRegisterFrom(length), &done);
+ }
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // TODO: Also convert this intrinsic to the IsGcMarking strategy?
-
- // SystemArrayCopy implementation for Baker read barriers (see
- // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
- //
- // if (src_ptr != end_ptr) {
- // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // // Slow-path copy.
- // do {
- // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
- // } while (src_ptr != end_ptr)
- // } else {
- // // Fast-path copy.
- // do {
- // *dest_ptr++ = *src_ptr++;
- // } while (src_ptr != end_ptr)
- // }
- // }
-
- vixl::aarch64::Label loop, done;
-
- // Don't enter copy loop if `length == 0`.
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
-
- Register tmp = temps.AcquireW();
- // Make sure `tmp` is not IP0, as it is clobbered by
- // ReadBarrierMarkRegX entry points in
- // ReadBarrierSystemArrayCopySlowPathARM64.
- DCHECK_NE(LocationFrom(tmp).reg(), IP0);
-
- // /* int32_t */ monitor = src->monitor_
- __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `src` is unchanged by this operation, but its value now depends
- // on `tmp`.
- __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
-
- // Slow path used to copy array when `src` is gray.
- SlowPathCodeARM64* read_barrier_slow_path =
- new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
- codegen_->AddSlowPath(read_barrier_slow_path);
-
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
-
- // Fast-path copy.
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- __ Bind(&loop);
- __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
- __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&loop, ne);
-
- __ Bind(read_barrier_slow_path->GetExitLabel());
- __ Bind(&done);
- } else {
- // Non read barrier code.
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::aarch64::Label loop, done;
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ temps.Exclude(ip0);
Register tmp = temps.AcquireW();
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Compute base source address, base destination address, and end
+ // source address for System.arraycopy* intrinsics in `src_base`,
+ // `dst_base` and `src_end` respectively.
+ // Note that `src_curr_addr` is computed from from `src` (and
+ // `src_pos`) here, and thus honors the artificial dependency
+ // of `src` on `tmp`.
+ GenSystemArrayCopyAddresses(masm,
+ type,
+ src,
+ src_pos,
+ dest,
+ dest_pos,
+ length,
+ src_curr_addr,
+ dst_curr_addr,
+ src_stop_addr);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop;
+ __ Bind(&loop);
__ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ } else {
+ // Non read barrier code.
+ // Compute base source address, base destination address, and end
+ // source address for System.arraycopy* intrinsics in `src_base`,
+ // `dst_base` and `src_end` respectively.
+ GenSystemArrayCopyAddresses(masm,
+ type,
+ src,
+ src_pos,
+ dest,
+ dest_pos,
+ length,
+ src_curr_addr,
+ dst_curr_addr,
+ src_stop_addr);
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop;
+ __ Bind(&loop);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
}
- __ B(&loop);
__ Bind(&done);
}
}
+
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
@@ -2926,6 +2949,79 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ MacroAssembler* masm = GetVIXLAssembler();
+
+ Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireW();
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Mov(temp.W(), value);
+ __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
+ // Check bounds of our cache.
+ __ Add(out.W(), in.W(), -info.low);
+ __ Cmp(out.W(), info.high - info.low + 1);
+ vixl::aarch64::Label allocate, done;
+ __ B(&allocate, hs);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ MemOperand source = HeapOperand(
+ temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
+ codegen_->Load(Primitive::kPrimNot, out, source);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+ __ B(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 28e41cb086..3c53517b28 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -38,7 +38,8 @@ class CodeGeneratorARM64;
class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen)
+ : arena_(arena), codegen_(codegen) {}
// Define visitor methods.
@@ -56,6 +57,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorARM64* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
};
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index cc4889b26a..0d933eaf82 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
using helpers::OutputDRegister;
using helpers::OutputSRegister;
using helpers::OutputRegister;
@@ -117,6 +118,50 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
};
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
+ Primitive::Type type,
+ const vixl32::Register& array,
+ const Location& pos,
+ const vixl32::Register& base) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ if (pos.IsConstant()) {
+ int32_t constant = Int32ConstantFrom(pos);
+ __ Add(base, array, element_size * constant + data_offset);
+ } else {
+ __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
+ __ Add(base, base, data_offset);
+ }
+}
+
+// Compute end address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
+ Primitive::Type type,
+ const Location& copy_length,
+ const vixl32::Register& base,
+ const vixl32::Register& end) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+
+ if (copy_length.IsConstant()) {
+ int32_t constant = Int32ConstantFrom(copy_length);
+ __ Add(end, base, element_size * constant);
+ } else {
+ __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
+ }
+}
+
// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
@@ -137,9 +182,8 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+ Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
vixl32::Register dest = InputRegisterAt(instruction_, 2);
Location dest_pos = locations->InAt(3);
@@ -150,15 +194,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ Bind(GetEntryLabel());
// Compute the base destination address in `dst_curr_addr`.
- if (dest_pos.IsConstant()) {
- int32_t constant = Int32ConstantFrom(dest_pos);
- __ Add(dst_curr_addr, dest, element_size * constant + offset);
- } else {
- __ Add(dst_curr_addr,
- dest,
- Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
- __ Add(dst_curr_addr, dst_curr_addr, offset);
- }
+ GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
vixl32::Label loop;
__ Bind(&loop);
@@ -182,6 +218,8 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(!src_stop_addr.Is(ip));
DCHECK(!tmp.Is(ip));
DCHECK(tmp.IsRegister()) << tmp;
+ // TODO: Load the entrypoint once before the loop, instead of
+ // loading it at every iteration.
int32_t entry_point_offset =
CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
// This runtime call does not require a stack map.
@@ -203,6 +241,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
: arena_(codegen->GetGraph()->GetArena()),
+ codegen_(codegen),
assembler_(codegen->GetAssembler()),
features_(codegen->GetInstructionSetFeatures()) {}
@@ -295,9 +334,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
vixl32::Register out = RegisterFrom(locations->Out());
@@ -307,11 +348,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
vixl32::Register in_reg_lo = LowRegisterFrom(in);
vixl32::Register in_reg_hi = HighRegisterFrom(in);
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
__ Clz(out, in_reg_lo);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
__ Clz(out, RegisterFrom(in));
}
@@ -322,7 +366,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke*
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -334,27 +378,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
vixl32::Register out = RegisterFrom(locations->Out());
if (type == Primitive::kPrimLong) {
vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Rbit(out, in_reg_lo);
__ Clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
__ Rbit(out, in_reg_hi);
__ Clz(out, out);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
vixl32::Register in = RegisterFrom(locations->InAt(0));
__ Rbit(out, in);
@@ -371,7 +420,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -383,7 +432,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
@@ -464,7 +513,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -482,6 +532,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
const vixl32::Register temp1 = temps.Acquire();
vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -498,7 +549,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ it(cond);
__ vmov(cond, F32, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
__ Vmov(temp1, op1);
@@ -509,14 +561,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ And(temp1, temp1, temp2);
}
__ Vmov(out, temp1);
- __ B(&done);
+ __ B(final_label);
// handle NaN input.
__ Bind(&nan);
__ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
__ Vmov(out, temp1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -534,7 +588,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -543,10 +597,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
}
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -561,6 +616,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
vixl32::DRegister op2 = DRegisterFrom(op2_loc);
vixl32::DRegister out = OutputDRegister(invoke);
vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -577,19 +633,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
__ it(cond);
__ vmov(cond, F64, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0).
if (!is_min) {
__ Vand(F64, out, op1, op2);
- __ B(&done);
+ __ B(final_label);
}
// handle op1 == op2, min(+0.0,-0.0), NaN input.
__ Bind(&handle_nan_eq);
__ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -597,7 +656,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -605,7 +664,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
}
static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
@@ -736,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
__ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+ ArmVIXLAssembler* assembler = GetAssembler();
+ vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+ vixl32::Register out_reg = OutputRegister(invoke);
+ vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+ // Round to nearest integer, ties away from zero.
+ __ Vcvta(S32, F32, temp1, in_reg);
+ __ Vmov(out_reg, temp1);
+
+ // For positive, zero or NaN inputs, rounding is done.
+ __ Cmp(out_reg, 0);
+ __ B(ge, final_label, /* far_target */ false);
+
+ // Handle input < 0 cases.
+ // If input is negative but not a tie, previous result (round to nearest) is valid.
+ // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+ __ Vrinta(F32, F32, temp1, in_reg);
+ __ Vmov(temp2, 0.5);
+ __ Vsub(F32, temp1, in_reg, temp1);
+ __ Vcmp(F32, temp1, temp2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ {
+ // Use ExactAsemblyScope here because we are using IT.
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(eq);
+ __ add(eq, out_reg, out_reg, 1);
+ }
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -1632,6 +1743,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
vixl32::Label end;
vixl32::Label return_true;
vixl32::Label return_false;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1708,12 +1820,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ Mov(out, 1);
- __ B(&end);
+ __ B(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ Mov(out, 0);
- __ Bind(&end);
+
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2242,143 +2357,116 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-
- // Compute the base source address in `temp1`.
- if (src_pos.IsConstant()) {
- int32_t constant = Int32ConstantFrom(src_pos);
- __ Add(temp1, src, element_size * constant + offset);
- } else {
- __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
- __ Add(temp1, temp1, offset);
- }
-
- // Compute the end source address in `temp3`.
- if (length.IsConstant()) {
- int32_t constant = Int32ConstantFrom(length);
- __ Add(temp3, temp1, element_size * constant);
+ if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
+ // Null constant length: not need to emit the loop code at all.
} else {
- __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
- }
-
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // TODO: Also convert this intrinsic to the IsGcMarking strategy?
-
- // The base destination address is computed later, as `temp2` is
- // used for intermediate computations.
-
- // SystemArrayCopy implementation for Baker read barriers (see
- // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
- //
- // if (src_ptr != end_ptr) {
- // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // // Slow-path copy.
- // do {
- // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
- // } while (src_ptr != end_ptr)
- // } else {
- // // Fast-path copy.
- // do {
- // *dest_ptr++ = *src_ptr++;
- // } while (src_ptr != end_ptr)
- // }
- // }
-
- vixl32::Label loop, done;
-
- // Don't enter copy loop if `length == 0`.
- __ Cmp(temp1, temp3);
- __ B(eq, &done, /* far_target */ false);
-
- // /* int32_t */ monitor = src->monitor_
- __ Ldr(temp2, MemOperand(src, monitor_offset));
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `src` is unchanged by this operation, but its value now depends
- // on `temp2`.
- __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
-
- // Slow path used to copy array when `src` is gray.
- SlowPathCodeARMVIXL* read_barrier_slow_path =
- new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
- codegen_->AddSlowPath(read_barrier_slow_path);
-
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
- // Carry flag is the last bit shifted out by LSRS.
- __ B(cs, read_barrier_slow_path->GetEntryLabel());
-
- // Fast-path copy.
-
- // Compute the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = Int32ConstantFrom(dest_pos);
- __ Add(temp2, dest, element_size * constant + offset);
- } else {
- __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
- __ Add(temp2, temp2, offset);
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- __ Bind(&loop);
-
- {
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp_reg = temps.Acquire();
+ vixl32::Label done;
+ const Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
- __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
- __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+ if (length.IsRegister()) {
+ // Don't enter the copy loop if the length is null.
+ __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
}
- __ Cmp(temp1, temp3);
- __ B(ne, &loop, /* far_target */ false);
-
- __ Bind(read_barrier_slow_path->GetExitLabel());
- __ Bind(&done);
- } else {
- // Non read barrier code.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(temp2, MemOperand(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
+
+ // Compute the base source address in `temp1`.
+ // Note that `temp1` (the base source address) is computed from
+ // `src` (and `src_pos`) here, and thus honors the artificial
+ // dependency of `src` on `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+ // Compute the end source address in `temp3`.
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // Slow path used to copy array when `src` is gray.
+ // Note that the base destination address is computed in `temp2`
+ // by the slow path code.
+ SlowPathCodeARMVIXL* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ B(cs, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Compute the base destination address in `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl32::Label loop;
+ __ Bind(&loop);
+ {
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp_reg = temps.Acquire();
+ __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+ __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+ }
+ __ Cmp(temp1, temp3);
+ __ B(ne, &loop, /* far_target */ false);
- // Compute the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = Int32ConstantFrom(dest_pos);
- __ Add(temp2, dest, element_size * constant + offset);
+ __ Bind(read_barrier_slow_path->GetExitLabel());
} else {
- __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
- __ Add(temp2, temp2, offset);
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl32::Label loop, done;
- __ Cmp(temp1, temp3);
- __ B(eq, &done, /* far_target */ false);
- __ Bind(&loop);
-
- {
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp_reg = temps.Acquire();
-
- __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
- __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+ // Non read barrier code.
+ // Compute the base source address in `temp1`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+ // Compute the base destination address in `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+ // Compute the end source address in `temp3`.
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl32::Label loop;
+ __ Bind(&loop);
+ {
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp_reg = temps.Acquire();
+ __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+ __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+ }
+ __ Cmp(temp1, temp3);
+ __ B(ne, &loop, /* far_target */ false);
}
-
- __ Cmp(temp1, temp3);
- __ B(ne, &loop, /* far_target */ false);
__ Bind(&done);
}
@@ -2778,13 +2866,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
vixl32::Label done, compressed_string_loop;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ Add(dst_ptr, dstObj, data_offset);
__ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
__ Subs(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// src range to copy.
__ Add(src_ptr, srcObj, value_offset);
@@ -2828,7 +2917,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(ge, &loop, /* far_target */ false);
__ Adds(num_chr, num_chr, 4);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2841,7 +2930,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &remainder, /* far_target */ false);
if (mirror::kUseStringCompression) {
- __ B(&done);
+ __ B(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2857,7 +2946,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &compressed_string_loop, /* far_target */ false);
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
@@ -2990,8 +3081,78 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
__ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ LocationFrom(r0),
+ LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ ArmVIXLAssembler* const assembler = GetAssembler();
+
+ vixl32::Register out = RegisterFrom(locations->Out());
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ vixl32::Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Mov(temp, value);
+ assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ vixl32::Register in = RegisterFrom(locations->InAt(0));
+ // Check bounds of our cache.
+ __ Add(out, in, -info.low);
+ __ Cmp(out, info.high - info.low + 1);
+ vixl32::Label allocate, done;
+ __ B(hs, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
+ assembler->MaybeUnpoisonHeapReference(out);
+ __ B(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 6e79cb76a1..023cba1349 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -47,6 +47,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGenerator* codegen_;
ArmVIXLAssembler* assembler_;
const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a68403e9..b67793c4ed 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1514,21 +1514,31 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
Thread::PeerOffset<kMipsPointerSize>().Int32Value());
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
- bool can_call =
- invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
- invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile;
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(),
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafeGet(HInvoke* invoke,
Primitive::Type type,
bool is_volatile,
@@ -1539,45 +1549,109 @@ static void GenUnsafeGet(HInvoke* invoke,
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot)) << type;
MipsAssembler* assembler = codegen->GetAssembler();
+ // Target register.
+ Location trg_loc = locations->Out();
// Object pointer.
- Register base = locations->InAt(1).AsRegister<Register>();
+ Location base_loc = locations->InAt(1);
+ Register base = base_loc.AsRegister<Register>();
// The "offset" argument is passed as a "long". Since this code is for
// a 32-bit processor, we can only use 32-bit addresses, so we only
// need the low 32-bits of offset.
- Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>();
+ Location offset_loc = locations->InAt(2);
+ Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
- __ Addu(TMP, base, offset_lo);
- if (is_volatile) {
- __ Sync(0);
+ if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+ __ Addu(TMP, base, offset_lo);
}
- if (type == Primitive::kPrimLong) {
- Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
- Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
- if (is_R6) {
- __ Lw(trg_lo, TMP, 0);
- __ Lw(trg_hi, TMP, 4);
- } else {
- __ Lwr(trg_lo, TMP, 0);
- __ Lwl(trg_lo, TMP, 3);
- __ Lwr(trg_hi, TMP, 4);
- __ Lwl(trg_hi, TMP, 7);
+ switch (type) {
+ case Primitive::kPrimLong: {
+ Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+ Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+ CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads.
+ if (is_R6) {
+ __ Lw(trg_lo, TMP, 0);
+ __ Lw(trg_hi, TMP, 4);
+ } else {
+ __ Lwr(trg_lo, TMP, 0);
+ __ Lwl(trg_lo, TMP, 3);
+ __ Lwr(trg_hi, TMP, 4);
+ __ Lwl(trg_hi, TMP, 7);
+ }
+ break;
}
- } else {
- Register trg = locations->Out().AsRegister<Register>();
- if (is_R6) {
- __ Lw(trg, TMP, 0);
- } else {
- __ Lwr(trg, TMP, 0);
- __ Lwl(trg, TMP, 3);
+ case Primitive::kPrimInt: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ break;
}
+
+ case Primitive::kPrimNot: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ /* offset */ 0U,
+ /* index */ offset_loc,
+ TIMES_1,
+ temp,
+ /* needs_null_check */ false);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ } else {
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ codegen->GenerateReadBarrierSlow(invoke,
+ trg_loc,
+ trg_loc,
+ base_loc,
+ /* offset */ 0U,
+ /* index */ offset_loc);
+ }
+ } else {
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type " << type;
+ UNREACHABLE();
}
}
// int sun.misc.Unsafe.getInt(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
@@ -1586,7 +1660,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1595,25 +1669,16 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
// long sun.misc.Unsafe.getLong(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_);
}
-// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_);
-}
-
// Object sun.misc.Unsafe.getObject(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1622,7 +1687,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1639,6 +1704,8 @@ static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* in
locations->SetInAt(3, Location::RequiresRegister());
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafePut(LocationSummary* locations,
Primitive::Type type,
bool is_volatile,
@@ -1663,6 +1730,11 @@ static void GenUnsafePut(LocationSummary* locations,
if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
Register value = locations->InAt(3).AsRegister<Register>();
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(AT, value);
+ value = AT;
+ }
+
if (is_R6) {
__ Sw(value, TMP, 0);
} else {
@@ -1672,7 +1744,7 @@ static void GenUnsafePut(LocationSummary* locations,
} else {
Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
-
+ CHECK(!is_volatile); // TODO: support atomic 8-byte volatile stores.
if (is_R6) {
__ Sw(value_lo, TMP, 0);
__ Sw(value_hi, TMP, 4);
@@ -1806,50 +1878,83 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
codegen_);
}
-// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- CreateIntIntIntIntToVoidLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
- Primitive::kPrimLong,
- /* is_volatile */ true,
- /* is_ordered */ false,
- IsR6(),
- codegen_);
-}
-
-static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
-
locations->SetOut(Location::RequiresRegister());
+
+ // Temporary register used in CAS by (Baker) read barrier.
+ if (can_call) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) {
MipsAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
bool isR6 = codegen->GetInstructionSetFeatures().IsR6();
Register base = locations->InAt(1).AsRegister<Register>();
- Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location offset_loc = locations->InAt(2);
+ Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
Register expected = locations->InAt(3).AsRegister<Register>();
Register value = locations->InAt(4).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
DCHECK_NE(base, out);
DCHECK_NE(offset_lo, out);
DCHECK_NE(expected, out);
if (type == Primitive::kPrimNot) {
- // Mark card for object assuming new value is stored.
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ temp,
+ /* needs_null_check */ false,
+ /* always_update_field */ true);
+ }
+ }
+
+ MipsLabel loop_head, exit_loop;
+ __ Addu(TMP, base, offset_lo);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value`, if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
}
// do {
@@ -1857,8 +1962,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- MipsLabel loop_head, exit_loop;
- __ Addu(TMP, base, offset_lo);
__ Sync(0);
__ Bind(&loop_head);
if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
@@ -1868,8 +1971,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ LlR2(out, TMP);
}
} else {
- LOG(FATAL) << "Unsupported op size " << type;
- UNREACHABLE();
+ LOG(FATAL) << "Unsupported op size " << type;
+ UNREACHABLE();
}
__ Subu(out, out, expected); // If we didn't get the 'expected'
__ Sltiu(out, out, 1); // value, set 'out' to false, and
@@ -1894,24 +1997,43 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// cycle atomically then retry.
__ Bind(&exit_loop);
__ Sync(0);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value`, if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
}
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ return;
+ }
+
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
// int java.lang.String.compareTo(String anotherString)
@@ -1989,20 +2111,24 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
__ LoadConst32(out, 1);
return;
}
-
- // Check if input is null, return false if it is.
- __ Beqz(arg, &return_false);
+ StringEqualsOptimizations optimizations(invoke);
+ if (!optimizations.GetArgumentNotNull()) {
+ // Check if input is null, return false if it is.
+ __ Beqz(arg, &return_false);
+ }
// Reference equality check, return true if same reference.
__ Beq(str, arg, &return_true);
- // Instanceof check for the argument by comparing class fields.
- // All string objects must have the same type since String cannot be subclassed.
- // Receiver must be a string object, so its class field is equal to all strings' class fields.
- // If the argument is a string object, its class field must be equal to receiver's class field.
- __ Lw(temp1, str, class_offset);
- __ Lw(temp2, arg, class_offset);
- __ Bne(temp1, temp2, &return_false);
+ if (!optimizations.GetArgumentIsString()) {
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bne(temp1, temp2, &return_false);
+ }
// Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
@@ -2527,7 +2653,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2535,17 +2661,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke)
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling convention.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -2564,16 +2682,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dstBegin = locations->InAt(4).AsRegister<Register>();
Register dstPtr = locations->GetTemp(0).AsRegister<Register>();
- DCHECK_EQ(dstPtr, A0);
Register srcPtr = locations->GetTemp(1).AsRegister<Register>();
- DCHECK_EQ(srcPtr, A1);
Register numChrs = locations->GetTemp(2).AsRegister<Register>();
- DCHECK_EQ(numChrs, A2);
-
- Register dstReturn = locations->GetTemp(3).AsRegister<Register>();
- DCHECK_EQ(dstReturn, V0);
MipsLabel done;
+ MipsLabel loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -2602,7 +2715,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Sll(TMP, TMP, 31);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnez(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2628,10 +2741,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Addu(srcPtr, srcPtr, AT);
}
- // Calculate number of bytes to copy from number of characters.
- __ Sll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Addiu(numChrs, numChrs, -1);
+ __ Addiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Addiu(dstPtr, dstPtr, char_size);
+ __ Bnez(numChrs, &loop);
__ Bind(&done);
}
@@ -2642,6 +2758,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile);
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile);
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
@@ -2682,6 +2800,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+
UNREACHABLE_INTRINSICS(MIPS)
#undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828722..c2518a7861 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -834,15 +834,15 @@ static void GenRoundingMode(LocationSummary* locations,
__ Bnezc(AT, &done);
// Long outLong = floor/ceil(in);
- // if outLong == Long.MAX_VALUE {
+ // if (outLong == Long.MAX_VALUE) || (outLong == Long.MIN_VALUE) {
// // floor()/ceil() has almost certainly returned a value
// // which can't be successfully represented as a signed
// // 64-bit number. Java expects that the input value will
// // be returned in these cases.
// // There is also a small probability that floor(in)/ceil(in)
// // correctly truncates/rounds up the input value to
- // // Long.MAX_VALUE. In that case, this exception handling
- // // code still does the correct thing.
+ // // Long.MAX_VALUE or Long.MIN_VALUE. In these cases, this
+ // // exception handling code still does the correct thing.
// return in;
// }
if (mode == kFloor) {
@@ -852,8 +852,14 @@ static void GenRoundingMode(LocationSummary* locations,
}
__ Dmfc1(AT, out);
__ MovD(out, in);
- __ LoadConst64(TMP, kPrimLongMax);
- __ Beqc(AT, TMP, &done);
+ __ Daddiu(TMP, AT, 1);
+ __ Dati(TMP, 0x8000); // TMP = AT + 0x8000 0000 0000 0001
+ // or AT - 0x7FFF FFFF FFFF FFFF.
+ // IOW, TMP = 1 if AT = Long.MIN_VALUE
+ // or TMP = 0 if AT = Long.MAX_VALUE.
+ __ Dsrl(TMP, TMP, 1); // TMP = 0 if AT = Long.MIN_VALUE
+ // or AT = Long.MAX_VALUE.
+ __ Beqzc(TMP, &done);
// double out = outLong;
// return out;
@@ -1151,16 +1157,31 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
Thread::PeerOffset<kMips64PointerSize>().Int32Value());
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(),
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafeGet(HInvoke* invoke,
Primitive::Type type,
bool is_volatile,
@@ -1168,29 +1189,71 @@ static void GenUnsafeGet(HInvoke* invoke,
LocationSummary* locations = invoke->GetLocations();
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
- (type == Primitive::kPrimNot));
+ (type == Primitive::kPrimNot)) << type;
Mips64Assembler* assembler = codegen->GetAssembler();
+ // Target register.
+ Location trg_loc = locations->Out();
+ GpuRegister trg = trg_loc.AsRegister<GpuRegister>();
// Object pointer.
- GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+ Location base_loc = locations->InAt(1);
+ GpuRegister base = base_loc.AsRegister<GpuRegister>();
// Long offset.
- GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
- GpuRegister trg = locations->Out().AsRegister<GpuRegister>();
+ Location offset_loc = locations->InAt(2);
+ GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
- __ Daddu(TMP, base, offset);
- if (is_volatile) {
- __ Sync(0);
+ if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+ __ Daddu(TMP, base, offset);
}
+
switch (type) {
+ case Primitive::kPrimLong:
+ __ Ld(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ break;
+
case Primitive::kPrimInt:
__ Lw(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
break;
case Primitive::kPrimNot:
- __ Lwu(trg, TMP, 0);
- break;
-
- case Primitive::kPrimLong:
- __ Ld(trg, TMP, 0);
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ /* offset */ 0U,
+ /* index */ offset_loc,
+ TIMES_1,
+ temp,
+ /* needs_null_check */ false);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ } else {
+ __ Lwu(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ codegen->GenerateReadBarrierSlow(invoke,
+ trg_loc,
+ trg_loc,
+ base_loc,
+ /* offset */ 0U,
+ /* index */ offset_loc);
+ }
+ } else {
+ __ Lwu(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
break;
default:
@@ -1201,7 +1264,7 @@ static void GenUnsafeGet(HInvoke* invoke,
// int sun.misc.Unsafe.getInt(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1210,7 +1273,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1219,7 +1282,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
// long sun.misc.Unsafe.getLong(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
@@ -1228,7 +1291,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
@@ -1237,7 +1300,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObject(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1246,7 +1309,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1263,6 +1326,8 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetInAt(3, Location::RequiresRegister());
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafePut(LocationSummary* locations,
Primitive::Type type,
bool is_volatile,
@@ -1285,7 +1350,12 @@ static void GenUnsafePut(LocationSummary* locations,
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimNot:
- __ Sw(value, TMP, 0);
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(AT, value);
+ __ Sw(AT, TMP, 0);
+ } else {
+ __ Sw(value, TMP, 0);
+ }
break;
case Primitive::kPrimLong:
@@ -1423,35 +1493,82 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
-
locations->SetOut(Location::RequiresRegister());
+
+ // Temporary register used in CAS by (Baker) read barrier.
+ if (can_call) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
Mips64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+ Location offset_loc = locations->InAt(2);
+ GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ Location out_loc = locations->Out();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
DCHECK_NE(base, out);
DCHECK_NE(offset, out);
DCHECK_NE(expected, out);
if (type == Primitive::kPrimNot) {
- // Mark card for object assuming new value is stored.
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ temp,
+ /* needs_null_check */ false,
+ /* always_update_field */ true);
+ }
+ }
+
+ Mips64Label loop_head, exit_loop;
+ __ Daddu(TMP, base, offset);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value`, if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
}
// do {
@@ -1459,8 +1576,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- Mips64Label loop_head, exit_loop;
- __ Daddu(TMP, base, offset);
__ Sync(0);
__ Bind(&loop_head);
if (type == Primitive::kPrimLong) {
@@ -1469,6 +1584,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// Note: We will need a read barrier here, when read barrier
// support is added to the MIPS64 back end.
__ Ll(out, TMP);
+ if (type == Primitive::kPrimNot) {
+ // The LL instruction sign-extends the 32-bit value, but
+ // 32-bit references must be zero-extended. Zero-extend `out`.
+ __ Dext(out, out, 0, 32);
+ }
}
__ Dsubu(out, out, expected); // If we didn't get the 'expected'
__ Sltiu(out, out, 1); // value, set 'out' to false, and
@@ -1487,33 +1607,52 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// cycle atomically then retry.
__ Bind(&exit_loop);
__ Sync(0);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value`, if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
}
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+ GenCas(invoke, Primitive::kPrimLong, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ return;
+ }
+
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
// int java.lang.String.compareTo(String anotherString)
@@ -1593,19 +1732,24 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
return;
}
- // Check if input is null, return false if it is.
- __ Beqzc(arg, &return_false);
+ StringEqualsOptimizations optimizations(invoke);
+ if (!optimizations.GetArgumentNotNull()) {
+ // Check if input is null, return false if it is.
+ __ Beqzc(arg, &return_false);
+ }
// Reference equality check, return true if same reference.
__ Beqc(str, arg, &return_true);
- // Instanceof check for the argument by comparing class fields.
- // All string objects must have the same type since String cannot be subclassed.
- // Receiver must be a string object, so its class field is equal to all strings' class fields.
- // If the argument is a string object, its class field must be equal to receiver's class field.
- __ Lw(temp1, str, class_offset);
- __ Lw(temp2, arg, class_offset);
- __ Bnec(temp1, temp2, &return_false);
+ if (!optimizations.GetArgumentIsString()) {
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bnec(temp1, temp2, &return_false);
+ }
// Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
@@ -1860,7 +2004,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -1868,17 +2012,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling conventions.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1897,16 +2033,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>();
GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>();
- DCHECK_EQ(dstPtr, A0);
GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>();
- DCHECK_EQ(srcPtr, A1);
GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>();
- DCHECK_EQ(numChrs, A2);
-
- GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>();
- DCHECK_EQ(dstReturn, V0);
Mips64Label done;
+ Mips64Label loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -1930,7 +2061,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Dext(TMP, TMP, 0, 1);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnezc(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -1951,10 +2082,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Daddiu(srcPtr, srcObj, value_offset);
__ Dlsa(srcPtr, srcBegin, srcPtr, char_shift);
- // Calculate number of bytes to copy from number of characters.
- __ Dsll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Daddiu(numChrs, numChrs, -1);
+ __ Daddiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Daddiu(dstPtr, dstPtr, char_size);
+ __ Bnezc(numChrs, &loop);
__ Bind(&done);
}
@@ -2075,6 +2209,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+
UNREACHABLE_INTRINSICS(MIPS64)
#undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e1b7ea53b4..ecf919bceb 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2878,6 +2878,49 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1)
return instruction->InputAt(input0) == instruction->InputAt(input1);
}
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
+ Primitive::Type type,
+ const Register& array,
+ const Location& pos,
+ const Register& base) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ if (pos.IsConstant()) {
+ int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(base, Address(array, element_size * constant + data_offset));
+ } else {
+ __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
+ }
+}
+
+// Compute end source address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
+ Primitive::Type type,
+ const Location& copy_length,
+ const Register& base,
+ const Register& end) {
+ // This routine is only used by the SystemArrayCopy intrinsic at the
+ // moment. We can allow Primitive::kPrimNot as `type` to implement
+ // the SystemArrayCopyChar intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+
+ if (copy_length.IsConstant()) {
+ int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(end, Address(base, element_size * constant));
+ } else {
+ __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
+ }
+}
+
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
@@ -3182,16 +3225,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
+ const Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
+
// Compute the base source address in `temp1`.
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- DCHECK_EQ(element_size, 4);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
- if (src_pos.IsConstant()) {
- int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp1, Address(src, element_size * constant + offset));
- } else {
- __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// If it is needed (in the case of the fast-path loop), the base
@@ -3199,20 +3237,15 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// intermediate computations.
// Compute the end source address in `temp3`.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
- } else {
- if (length.IsStackSlot()) {
- // Location `length` is again pointing at a stack slot, as
- // register `temp3` (which was containing the length parameter
- // earlier) has been overwritten; restore it now
- DCHECK(length.Equals(length_arg));
- __ movl(temp3, Address(ESP, length.GetStackIndex()));
- length = Location::RegisterLocation(temp3);
- }
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
}
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
@@ -3266,15 +3299,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
// Fast-path copy.
-
- // Set the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
-
+ // Compute the base destination address in `temp2`.
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
// Iterate over the arrays and do a raw copy of the objects. We don't need to
// poison/unpoison.
__ Bind(&loop);
@@ -3291,23 +3317,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ Bind(&done);
} else {
// Non read barrier code.
-
// Compute the base destination address in `temp2`.
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
-
+ GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
// Compute the end source address in `temp3`.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
- } else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
-
+ GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
// Iterate over the arrays and do a raw copy of the objects. We don't need to
// poison/unpoison.
NearLabel loop, done;
@@ -3326,15 +3339,70 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1,
- temp2,
- dest,
- Register(kNoRegister),
- /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(EAX),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ X86Assembler* assembler = GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ movl(out, Immediate(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), Immediate(value));
+ }
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ // Check bounds of our cache.
+ __ leal(out, Address(in, -info.low));
+ __ cmpl(out, Immediate(info.high - info.low + 1));
+ NearLabel allocate, done;
+ __ j(kAboveEqual, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ movl(out, Address(out, TIMES_4, data_offset + address));
+ __ MaybeUnpoisonHeapReference(out);
+ __ jmp(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), in);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 05d270a4e6..13956dfb8e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -39,7 +39,6 @@ IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX8
: arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
}
-
X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
}
@@ -1119,6 +1118,47 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
}
+// Compute base source address, base destination address, and end
+// source address for the System.arraycopy intrinsic in `src_base`,
+// `dst_base` and `src_end` respectively.
+static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
+ Primitive::Type type,
+ const CpuRegister& src,
+ const Location& src_pos,
+ const CpuRegister& dst,
+ const Location& dst_pos,
+ const Location& copy_length,
+ const CpuRegister& src_base,
+ const CpuRegister& dst_base,
+ const CpuRegister& src_end) {
+ // This routine is only used by the SystemArrayCopy intrinsic.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ const int32_t element_size = Primitive::ComponentSize(type);
+ const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(src_base, Address(src, element_size * constant + data_offset));
+ } else {
+ __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
+ }
+
+ if (dst_pos.IsConstant()) {
+ int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(dst_base, Address(dst, element_size * constant + data_offset));
+ } else {
+ __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
+ }
+
+ if (copy_length.IsConstant()) {
+ int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(src_end, Address(src_base, element_size * constant));
+ } else {
+ __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
+ }
+}
+
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
@@ -1367,30 +1407,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ const Primitive::Type type = Primitive::kPrimNot;
+ const int32_t element_size = Primitive::ComponentSize(type);
- int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
- if (src_pos.IsConstant()) {
- int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp1, Address(src, element_size * constant + offset));
- } else {
- __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
- }
-
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
- }
-
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
- } else {
- __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
- }
+ // Compute base source address, base destination address, and end
+ // source address in `temp1`, `temp2` and `temp3` respectively.
+ GenSystemArrayCopyAddresses(
+ GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// SystemArrayCopy implementation for Baker read barriers (see
@@ -1475,11 +1498,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1,
- temp2,
- dest,
- CpuRegister(kNoRegister),
- /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -2995,6 +3014,73 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(RAX),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ X86_64Assembler* assembler = GetAssembler();
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ movl(out, Immediate(static_cast<int32_t>(address)));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(argument, Immediate(static_cast<int32_t>(address)));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), Immediate(value));
+ }
+ } else {
+ CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
+ // Check bounds of our cache.
+ __ leal(out, Address(in, -info.low));
+ __ cmpl(out, Immediate(info.high - info.low + 1));
+ NearLabel allocate, done;
+ __ j(kAboveEqual, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
+ __ movl(out, Address(out, TIMES_4, data_offset + address));
+ } else {
+ CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
+ __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
+ __ movl(out, Address(temp, out, TIMES_4, 0));
+ }
+ __ MaybeUnpoisonHeapReference(out);
+ __ jmp(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(argument, Immediate(static_cast<int32_t>(address)));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), in);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 5bcfa4c98b..8d15f78cce 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -28,7 +28,18 @@ namespace art {
*/
class LICMTest : public CommonCompilerTest {
public:
- LICMTest() : pool_(), allocator_(&pool_) {
+ LICMTest()
+ : pool_(),
+ allocator_(&pool_),
+ entry_(nullptr),
+ loop_preheader_(nullptr),
+ loop_header_(nullptr),
+ loop_body_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ int_constant_(nullptr),
+ float_constant_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2d3c00fb97..48699b33ae 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -38,7 +38,8 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
position_(pos),
is_singleton_(true),
is_singleton_and_not_returned_(true),
- is_singleton_and_not_deopt_visible_(true) {
+ is_singleton_and_not_deopt_visible_(true),
+ has_index_aliasing_(false) {
CalculateEscape(reference_,
nullptr,
&is_singleton_,
@@ -68,13 +69,36 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
}
+ // Returns true if reference_ is a singleton and returned to the caller or
+ // used as an environment local of an HDeoptimize instruction.
+ bool IsSingletonAndNonRemovable() const {
+ return is_singleton_ &&
+ (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+ }
+
+ bool HasIndexAliasing() {
+ return has_index_aliasing_;
+ }
+
+ void SetHasIndexAliasing(bool has_index_aliasing) {
+ // Only allow setting to true.
+ DCHECK(has_index_aliasing);
+ has_index_aliasing_ = has_index_aliasing;
+ }
+
private:
HInstruction* const reference_;
const size_t position_; // position in HeapLocationCollector's ref_info_array_.
- bool is_singleton_; // can only be referred to by a single name in the method,
- bool is_singleton_and_not_returned_; // and not returned to caller,
- bool is_singleton_and_not_deopt_visible_; // and not used as an environment local of HDeoptimize.
+ // Can only be referred to by a single name in the method.
+ bool is_singleton_;
+ // Is singleton and not returned to caller.
+ bool is_singleton_and_not_returned_;
+ // Is singleton and not used as an environment local of HDeoptimize.
+ bool is_singleton_and_not_deopt_visible_;
+ // Some heap locations with reference_ have array index aliasing,
+ // e.g. arr[i] and arr[j] may be the same location.
+ bool has_index_aliasing_;
DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
};
@@ -321,6 +345,8 @@ class HeapLocationCollector : public HGraphVisitor {
// Different constant indices do not alias.
return false;
}
+ ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+ ref_info->SetHasIndexAliasing(true);
}
return true;
}
@@ -497,7 +523,8 @@ class LSEVisitor : public HGraphVisitor {
removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
- singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
+ singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -534,20 +561,24 @@ class LSEVisitor : public HGraphVisitor {
}
// At this point, stores in possibly_removed_stores_ can be safely removed.
- for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
- HInstruction* store = possibly_removed_stores_[i];
+ for (HInstruction* store : possibly_removed_stores_) {
DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
store->GetBlock()->RemoveInstruction(store);
}
// Eliminate allocations that are not used.
- for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
- HInstruction* new_instance = singleton_new_instances_[i];
+ for (HInstruction* new_instance : singleton_new_instances_) {
if (!new_instance->HasNonEnvironmentUses()) {
new_instance->RemoveEnvironmentUsers();
new_instance->GetBlock()->RemoveInstruction(new_instance);
}
}
+ for (HInstruction* new_array : singleton_new_arrays_) {
+ if (!new_array->HasNonEnvironmentUses()) {
+ new_array->RemoveEnvironmentUsers();
+ new_array->GetBlock()->RemoveInstruction(new_array);
+ }
+ }
}
private:
@@ -558,7 +589,7 @@ class LSEVisitor : public HGraphVisitor {
void KeepIfIsStore(HInstruction* heap_value) {
if (heap_value == kDefaultHeapValue ||
heap_value == kUnknownHeapValue ||
- !heap_value->IsInstanceFieldSet()) {
+ !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
return;
}
auto idx = std::find(possibly_removed_stores_.begin(),
@@ -600,14 +631,17 @@ class LSEVisitor : public HGraphVisitor {
for (size_t i = 0; i < heap_values.size(); i++) {
HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
ReferenceInfo* ref_info = location->GetReferenceInfo();
- if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
- // heap value is killed by loop side effects (stored into directly, or due to
- // aliasing).
+ if (ref_info->IsSingletonAndRemovable() &&
+ !location->IsValueKilledByLoopSideEffects()) {
+ // A removable singleton's field that's not stored into inside a loop is
+ // invariant throughout the loop. Nothing to do.
+ DCHECK(ref_info->IsSingletonAndRemovable());
+ } else {
+ // heap value is killed by loop side effects (stored into directly, or
+ // due to aliasing). Or the heap value may be needed after method return
+ // or deoptimization.
KeepIfIsStore(pre_header_heap_values[i]);
heap_values[i] = kUnknownHeapValue;
- } else {
- // A singleton's field that's not stored into inside a loop is invariant throughout
- // the loop.
}
}
}
@@ -626,7 +660,7 @@ class LSEVisitor : public HGraphVisitor {
bool from_all_predecessors = true;
ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
HInstruction* singleton_ref = nullptr;
- if (ref_info->IsSingletonAndRemovable()) {
+ if (ref_info->IsSingleton()) {
// We do more analysis of liveness when merging heap values for such
// cases since stores into such references may potentially be eliminated.
singleton_ref = ref_info->GetReference();
@@ -652,8 +686,9 @@ class LSEVisitor : public HGraphVisitor {
}
}
- if (merged_value == kUnknownHeapValue) {
- // There are conflicting heap values from different predecessors.
+ if (merged_value == kUnknownHeapValue || ref_info->IsSingletonAndNonRemovable()) {
+ // There are conflicting heap values from different predecessors,
+ // or the heap value may be needed after method return or deoptimization.
// Keep the last store in each predecessor since future loads cannot be eliminated.
for (HBasicBlock* predecessor : predecessors) {
ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
@@ -734,13 +769,16 @@ class LSEVisitor : public HGraphVisitor {
heap_values[idx] = constant;
return;
}
- if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
- HInstruction* store = heap_value;
- // This load must be from a singleton since it's from the same field
- // that a "removed" store puts the value. That store must be to a singleton's field.
- DCHECK(ref_info->IsSingleton());
- // Get the real heap value of the store.
- heap_value = store->InputAt(1);
+ if (heap_value != kUnknownHeapValue) {
+ if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+ HInstruction* store = heap_value;
+ // This load must be from a singleton since it's from the same
+ // field/element that a "removed" store puts the value. That store
+ // must be to a singleton's field/element.
+ DCHECK(ref_info->IsSingleton());
+ // Get the real heap value of the store.
+ heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
+ }
}
if (heap_value == kUnknownHeapValue) {
// Load isn't eliminated. Put the load as the value into the HeapLocation.
@@ -796,19 +834,19 @@ class LSEVisitor : public HGraphVisitor {
if (Equal(heap_value, value)) {
// Store into the heap location with the same value.
same_value = true;
- } else if (index != nullptr) {
- // For array element, don't eliminate stores since it can be easily aliased
- // with non-constant index.
- } else if (ref_info->IsSingletonAndRemovable()) {
- // Store into a field of a singleton that's not returned. The value cannot be
- // killed due to aliasing/invocation. It can be redundant since future loads can
+ } else if (index != nullptr && ref_info->HasIndexAliasing()) {
+ // For array element, don't eliminate stores if the index can be aliased.
+ } else if (ref_info->IsSingleton()) {
+ // Store into a field of a singleton. The value cannot be killed due to
+ // aliasing/invocation. It can be redundant since future loads can
// directly get the value set by this instruction. The value can still be killed due to
// merging or loop side effects. Stores whose values are killed due to merging/loop side
// effects later will be removed from possibly_removed_stores_ when that is detected.
+ // Stores whose values may be needed after method return or deoptimization
+ // are also removed from possibly_removed_stores_ when that is detected.
possibly_redundant = true;
HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
- DCHECK(new_instance != nullptr);
- if (new_instance->IsFinalizable()) {
+ if (new_instance != nullptr && new_instance->IsFinalizable()) {
// Finalizable objects escape globally. Need to keep the store.
possibly_redundant = false;
} else {
@@ -834,7 +872,7 @@ class LSEVisitor : public HGraphVisitor {
if (!same_value) {
if (possibly_redundant) {
- DCHECK(instruction->IsInstanceFieldSet());
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
// Put the store as the heap value. If the value is loaded from heap
// by a load later, this store isn't really redundant.
heap_values[idx] = instruction;
@@ -914,6 +952,33 @@ class LSEVisitor : public HGraphVisitor {
value);
}
+ void VisitDeoptimize(HDeoptimize* instruction) {
+ const ArenaVector<HInstruction*>& heap_values =
+ heap_values_for_[instruction->GetBlock()->GetBlockId()];
+ for (HInstruction* heap_value : heap_values) {
+ // Filter out fake instructions before checking instruction kind below.
+ if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) {
+ continue;
+ }
+ // A store is kept as the heap value for possibly removed stores.
+ if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+ // Check whether the reference for a store is used by an environment local of
+ // HDeoptimize.
+ HInstruction* reference = heap_value->InputAt(0);
+ DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton());
+ for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+ HEnvironment* user = use.GetUser();
+ if (user->GetHolder() == instruction) {
+ // The singleton for the store is visible at this deoptimization
+ // point. Need to keep the store so that the heap value is
+ // seen by the interpreter.
+ KeepIfIsStore(heap_value);
+ }
+ }
+ }
+ }
+ }
+
void HandleInvoke(HInstruction* invoke) {
ArenaVector<HInstruction*>& heap_values =
heap_values_for_[invoke->GetBlock()->GetBlockId()];
@@ -995,6 +1060,27 @@ class LSEVisitor : public HGraphVisitor {
}
}
+ void VisitNewArray(HNewArray* new_array) OVERRIDE {
+ ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
+ if (ref_info == nullptr) {
+ // new_array isn't used for array accesses. No need to process it.
+ return;
+ }
+ if (ref_info->IsSingletonAndRemovable()) {
+ singleton_new_arrays_.push_back(new_array);
+ }
+ ArenaVector<HInstruction*>& heap_values =
+ heap_values_for_[new_array->GetBlock()->GetBlockId()];
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+ HInstruction* ref = location->GetReferenceInfo()->GetReference();
+ if (ref == new_array && location->GetIndex() != nullptr) {
+ // Array elements are set to default heap values.
+ heap_values[i] = kDefaultHeapValue;
+ }
+ }
+ }
+
// Find an instruction's substitute if it should be removed.
// Return the same instruction if it should not be removed.
HInstruction* FindSubstitute(HInstruction* instruction) {
@@ -1023,6 +1109,7 @@ class LSEVisitor : public HGraphVisitor {
ArenaVector<HInstruction*> possibly_removed_stores_;
ArenaVector<HInstruction*> singleton_new_instances_;
+ ArenaVector<HInstruction*> singleton_new_arrays_;
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
};
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a63d..6f0dbce2df 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -69,11 +69,13 @@ class Location : public ValueObject {
// We do not use the value 9 because it conflicts with kLocationConstantMask.
kDoNotUse9 = 9,
+ kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes?
+
// Unallocated location represents a location that is not fixed and can be
// allocated by a register allocator. Each unallocated location has
// a policy that specifies what kind of location is suitable. Payload
// contains register allocation policy.
- kUnallocated = 10,
+ kUnallocated = 11,
};
Location() : ValueObject(), value_(kInvalid) {
@@ -82,6 +84,7 @@ class Location : public ValueObject {
static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
+ static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -266,8 +269,20 @@ class Location : public ValueObject {
return GetKind() == kDoubleStackSlot;
}
+ static Location SIMDStackSlot(intptr_t stack_index) {
+ uintptr_t payload = EncodeStackIndex(stack_index);
+ Location loc(kSIMDStackSlot, payload);
+ // Ensure that sign is preserved.
+ DCHECK_EQ(loc.GetStackIndex(), stack_index);
+ return loc;
+ }
+
+ bool IsSIMDStackSlot() const {
+ return GetKind() == kSIMDStackSlot;
+ }
+
intptr_t GetStackIndex() const {
- DCHECK(IsStackSlot() || IsDoubleStackSlot());
+ DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot());
// Decode stack index manually to preserve sign.
return GetPayload() - kStackIndexBias;
}
@@ -315,6 +330,7 @@ class Location : public ValueObject {
case kRegister: return "R";
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
+ case kSIMDStackSlot: return "SIMD";
case kUnallocated: return "U";
case kConstant: return "C";
case kFpuRegister: return "F";
@@ -417,6 +433,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
static RegisterSet Empty() { return RegisterSet(); }
+ static RegisterSet AllFpu() { return RegisterSet(0, -1); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -462,6 +479,7 @@ class RegisterSet : public ValueObject {
private:
RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
uint32_t core_registers_;
uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 26c9ab83c2..1a79601a93 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -16,10 +16,21 @@
#include "loop_optimization.h"
+#include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "driver/compiler_driver.h"
#include "linear_order.h"
namespace art {
+// Enables vectorization (SIMDization) in the loop optimizer.
+static constexpr bool kEnableVectorization = true;
+
// Remove the instruction from the graph. A bit more elaborate than the usual
// instruction removal, since there may be a cycle in the use structure.
static void RemoveFromCycle(HInstruction* instruction) {
@@ -52,24 +63,43 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {
return false;
}
+// Test vector restrictions.
+static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
+ return (restrictions & tested) != 0;
+}
+
+// Inserts an instruction.
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+ DCHECK(block != nullptr);
+ DCHECK(instruction != nullptr);
+ block->InsertInstructionBefore(instruction, block->GetLastInstruction());
+ return instruction;
+}
+
//
// Class methods.
//
HLoopOptimization::HLoopOptimization(HGraph* graph,
+ CompilerDriver* compiler_driver,
HInductionVarAnalysis* induction_analysis)
: HOptimization(graph, kLoopOptimizationPassName),
+ compiler_driver_(compiler_driver),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
+ global_allocator_(graph_->GetArena()),
top_loop_(nullptr),
last_loop_(nullptr),
iset_(nullptr),
induction_simplication_count_(0),
- simplified_(false) {
+ simplified_(false),
+ vector_length_(0),
+ vector_refs_(nullptr),
+ vector_map_(nullptr) {
}
void HLoopOptimization::Run() {
- // Well-behaved loops only.
+ // Skip if there is no loop or the graph has try-catch/irreducible loops.
// TODO: make this less of a sledgehammer.
if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
return;
@@ -78,14 +108,13 @@ void HLoopOptimization::Run() {
// Phase-local allocator that draws from the global pool. Since the allocator
// itself resides on the stack, it is destructed on exiting Run(), which
// implies its underlying memory is released immediately.
- ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+ ArenaAllocator allocator(global_allocator_->GetArenaPool());
loop_allocator_ = &allocator;
// Perform loop optimizations.
LocalRun();
-
if (top_loop_ == nullptr) {
- graph_->SetHasLoops(false);
+ graph_->SetHasLoops(false); // no more loops
}
// Detach.
@@ -107,18 +136,29 @@ void HLoopOptimization::LocalRun() {
}
// Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
- // a temporary set that stores instructions using the phase-local allocator.
+ // temporary data structures using the phase-local allocator. All new HIR
+ // should use the global allocator.
if (top_loop_ != nullptr) {
ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSafeMap<HInstruction*, HInstruction*> map(
+ std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ // Attach.
iset_ = &iset;
+ vector_refs_ = &refs;
+ vector_map_ = &map;
+ // Traverse.
TraverseLoopsInnerToOuter(top_loop_);
- iset_ = nullptr; // detach
+ // Detach.
+ iset_ = nullptr;
+ vector_refs_ = nullptr;
+ vector_map_ = nullptr;
}
}
void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
DCHECK(loop_info != nullptr);
- LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator
+ LoopNode* node = new (loop_allocator_) LoopNode(loop_info);
if (last_loop_ == nullptr) {
// First loop.
DCHECK(top_loop_ == nullptr);
@@ -166,7 +206,7 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) {
void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
for ( ; node != nullptr; node = node->next) {
// Visit inner loops first.
- int current_induction_simplification_count = induction_simplication_count_;
+ uint32_t current_induction_simplification_count = induction_simplication_count_;
if (node->inner != nullptr) {
TraverseLoopsInnerToOuter(node->inner);
}
@@ -175,7 +215,7 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
if (current_induction_simplification_count != induction_simplication_count_) {
induction_range_.ReVisit(node->loop_info);
}
- // Repeat simplifications in the body of this loop until no more changes occur.
+ // Repeat simplifications in the loop-body until no more changes occur.
// Note that since each simplification consists of eliminating code (without
// introducing new code), this process is always finite.
do {
@@ -183,13 +223,17 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
SimplifyInduction(node);
SimplifyBlocks(node);
} while (simplified_);
- // Simplify inner loop.
+ // Optimize inner loop.
if (node->inner == nullptr) {
- SimplifyInnerLoop(node);
+ OptimizeInnerLoop(node);
}
}
}
+//
+// Optimization.
+//
+
void HLoopOptimization::SimplifyInduction(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -200,13 +244,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) {
// for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- iset_->clear();
- int32_t use_count = 0;
- if (IsPhiInduction(phi) &&
- IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) &&
- // No uses, or no early-exit with proper replacement.
- (use_count == 0 ||
- (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
for (HInstruction* i : *iset_) {
RemoveFromCycle(i);
}
@@ -252,49 +292,47 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
+void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
- int64_t tc = 0;
- if (!induction_range_.IsFinite(node->loop_info, &tc)) {
- return false;
+ int64_t trip_count = 0;
+ if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
+ return;
}
+
// Ensure there is only a single loop-body (besides the header).
HBasicBlock* body = nullptr;
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
if (it.Current() != header) {
if (body != nullptr) {
- return false;
+ return;
}
body = it.Current();
}
}
// Ensure there is only a single exit point.
if (header->GetSuccessors().size() != 2) {
- return false;
+ return;
}
HBasicBlock* exit = (header->GetSuccessors()[0] == body)
? header->GetSuccessors()[1]
: header->GetSuccessors()[0];
// Ensure exit can only be reached by exiting loop.
if (exit->GetPredecessors().size() != 1) {
- return false;
+ return;
}
// Detect either an empty loop (no side effects other than plain iteration) or
// a trivial loop (just iterating once). Replace subsequent index uses, if any,
// with the last value and remove the loop, possibly after unrolling its body.
HInstruction* phi = header->GetFirstPhi();
- iset_->clear();
- int32_t use_count = 0;
- if (IsEmptyHeader(header)) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetSimpleLoopHeader(header)) {
bool is_empty = IsEmptyBody(body);
- if ((is_empty || tc == 1) &&
- IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
- // No uses, or proper replacement.
- (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) {
+ if ((is_empty || trip_count == 1) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
if (!is_empty) {
- // Unroll the loop body, which sees initial value of the index.
+ // Unroll the loop-body, which sees initial value of the index.
phi->ReplaceWith(phi->InputAt(0));
preheader->MergeInstructionsWith(body);
}
@@ -304,28 +342,705 @@ bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
header->RemoveDominatedBlock(exit);
header->DisconnectAndDelete();
preheader->AddSuccessor(exit);
- preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
+ preheader->AddInstruction(new (global_allocator_) HGoto());
preheader->AddDominatedBlock(exit);
exit->SetDominator(preheader);
RemoveLoop(node); // update hierarchy
+ return;
+ }
+ }
+
+ // Vectorize loop, if possible and valid.
+ if (kEnableVectorization) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetSimpleLoopHeader(header) &&
+ CanVectorize(node, body, trip_count) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+ Vectorize(node, body, exit, trip_count);
+ graph_->SetHasSIMD(true); // flag SIMD usage
+ return;
+ }
+ }
+}
+
+//
+// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
+// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
+// Intel Press, June, 2004 (http://www.aartbik.com/).
+//
+
+bool HLoopOptimization::CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+ // Reset vector bookkeeping.
+ vector_length_ = 0;
+ vector_refs_->clear();
+ vector_runtime_test_a_ =
+ vector_runtime_test_b_= nullptr;
+
+ // Phis in the loop-body prevent vectorization.
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+
+ // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
+ // occurrence, which allows passing down attributes down the use tree.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+ return false; // failure to vectorize a left-hand-side
+ }
+ }
+
+ // Heuristics. Does vectorization seem profitable?
+ // TODO: refine
+ if (vector_length_ == 0) {
+ return false; // nothing found
+ } else if (0 < trip_count && trip_count < vector_length_) {
+ return false; // insufficient iterations
+ }
+
+ // Data dependence analysis. Find each pair of references with same type, where
+ // at least one is a write. Each such pair denotes a possible data dependence.
+ // This analysis exploits the property that differently typed arrays cannot be
+ // aliased, as well as the property that references either point to the same
+ // array or to two completely disjoint arrays, i.e., no partial aliasing.
+ // Other than a few simply heuristics, no detailed subscript analysis is done.
+ for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
+ for (auto j = i; ++j != vector_refs_->end(); ) {
+ if (i->type == j->type && (i->lhs || j->lhs)) {
+ // Found same-typed a[i+x] vs. b[i+y], where at least one is a write.
+ HInstruction* a = i->base;
+ HInstruction* b = j->base;
+ HInstruction* x = i->offset;
+ HInstruction* y = j->offset;
+ if (a == b) {
+ // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence).
+ // Conservatively assume a loop-carried data dependence otherwise, and reject.
+ if (x != y) {
+ return false;
+ }
+ } else {
+ // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence).
+ // Conservatively assume a potential loop-carried data dependence otherwise, avoided by
+ // generating an explicit a != b disambiguation runtime test on the two references.
+ if (x != y) {
+ // For now, we reject after one test to avoid excessive overhead.
+ if (vector_runtime_test_a_ != nullptr) {
+ return false;
+ }
+ vector_runtime_test_a_ = a;
+ vector_runtime_test_b_ = b;
+ }
+ }
+ }
+ }
+ }
+
+ // Success!
+ return true;
+}
+
+void HLoopOptimization::Vectorize(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count) {
+ Primitive::Type induc_type = Primitive::kPrimInt;
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ // A cleanup is needed for any unknown trip count or for a known trip count
+ // with remainder iterations after vectorization.
+ bool needs_cleanup = trip_count == 0 || (trip_count % vector_length_) != 0;
+
+ // Adjust vector bookkeeping.
+ iset_->clear(); // prepare phi induction
+ bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_
+ DCHECK(is_simple_loop_header);
+
+ // Generate preheader:
+ // stc = <trip-count>;
+ // vtc = stc - stc % VL;
+ HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+ HInstruction* vtc = stc;
+ if (needs_cleanup) {
+ DCHECK(IsPowerOfTwo(vector_length_));
+ HInstruction* rem = Insert(
+ preheader, new (global_allocator_) HAnd(induc_type,
+ stc,
+ graph_->GetIntConstant(vector_length_ - 1)));
+ vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
+ }
+
+ // Generate runtime disambiguation test:
+ // vtc = a != b ? vtc : 0;
+ if (vector_runtime_test_a_ != nullptr) {
+ HInstruction* rt = Insert(
+ preheader,
+ new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+ vtc = Insert(preheader,
+ new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc));
+ needs_cleanup = true;
+ }
+
+ // Generate vector loop:
+ // for (i = 0; i < vtc; i += VL)
+ // <vectorized-loop-body>
+ vector_mode_ = kVector;
+ GenerateNewLoop(node,
+ block,
+ graph_->TransformLoopForVectorization(header, block, exit),
+ graph_->GetIntConstant(0),
+ vtc,
+ graph_->GetIntConstant(vector_length_));
+ HLoopInformation* vloop = vector_header_->GetLoopInformation();
+
+ // Generate cleanup loop, if needed:
+ // for ( ; i < stc; i += 1)
+ // <loop-body>
+ if (needs_cleanup) {
+ vector_mode_ = kSequential;
+ GenerateNewLoop(node,
+ block,
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
+ vector_phi_,
+ stc,
+ graph_->GetIntConstant(1));
+ }
+
+ // Remove the original loop by disconnecting the body block
+ // and removing all instructions from the header.
+ block->DisconnectAndDelete();
+ while (!header->GetFirstInstruction()->IsGoto()) {
+ header->RemoveInstruction(header->GetFirstInstruction());
+ }
+ // Update loop hierarchy: the old header now resides in the
+ // same outer loop as the old preheader.
+ header->SetLoopInformation(preheader->GetLoopInformation()); // outward
+ node->loop_info = vloop;
+}
+
+void HLoopOptimization::GenerateNewLoop(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step) {
+ Primitive::Type induc_type = Primitive::kPrimInt;
+ // Prepare new loop.
+ vector_map_->clear();
+ vector_preheader_ = new_preheader,
+ vector_header_ = vector_preheader_->GetSingleSuccessor();
+ vector_body_ = vector_header_->GetSuccessors()[1];
+ vector_phi_ = new (global_allocator_) HPhi(global_allocator_,
+ kNoRegNumber,
+ 0,
+ HPhi::ToPhiType(induc_type));
+ // Generate header and prepare body.
+ // for (i = lo; i < hi; i += step)
+ // <loop-body>
+ HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi);
+ vector_header_->AddPhi(vector_phi_);
+ vector_header_->AddInstruction(cond);
+ vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
+ DCHECK(vectorized_def);
+ }
+ // Generate body from the instruction map, but in original program order.
+ HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ auto i = vector_map_->find(it.Current());
+ if (i != vector_map_->end() && !i->second->IsInBlock()) {
+ Insert(vector_body_, i->second);
+ // Deal with instructions that need an environment, such as the scalar intrinsics.
+ if (i->second->NeedsEnvironment()) {
+ i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
+ }
+ }
+ }
+ // Finalize increment and phi.
+ HInstruction* inc = new (global_allocator_) HAdd(induc_type, vector_phi_, step);
+ vector_phi_->AddInput(lo);
+ vector_phi_->AddInput(Insert(vector_body_, inc));
+}
+
+// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
+bool HLoopOptimization::VectorizeDef(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code) {
+ // Accept a left-hand-side array base[index] for
+ // (1) supported vector type,
+ // (2) loop-invariant base,
+ // (3) unit stride index,
+ // (4) vectorizable right-hand-side value.
+ uint64_t restrictions = kNone;
+ if (instruction->IsArraySet()) {
+ Primitive::Type type = instruction->AsArraySet()->GetComponentType();
+ HInstruction* base = instruction->InputAt(0);
+ HInstruction* index = instruction->InputAt(1);
+ HInstruction* value = instruction->InputAt(2);
+ HInstruction* offset = nullptr;
+ if (TrySetVectorType(type, &restrictions) &&
+ node->loop_info->IsDefinedOutOfTheLoop(base) &&
+ induction_range_.IsUnitStride(index, &offset) &&
+ VectorizeUse(node, value, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecSub(index, offset);
+ GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), type);
+ } else {
+ vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true));
+ }
+ return true;
+ }
+ return false;
+ }
+ // Branch back okay.
+ if (instruction->IsGoto()) {
+ return true;
+ }
+ // Otherwise accept only expressions with no effects outside the immediate loop-body.
+ // Note that actual uses are inspected during right-hand-side tree traversal.
+ return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
+}
+
+// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+bool HLoopOptimization::VectorizeUse(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Accept anything for which code has already been generated.
+ if (generate_code) {
+ if (vector_map_->find(instruction) != vector_map_->end()) {
return true;
}
}
+ // Continue the right-hand-side tree traversal, passing in proper
+ // types and vector restrictions along the way. During code generation,
+ // all new nodes are drawn from the global allocator.
+ if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) {
+ // Accept invariant use, using scalar expansion.
+ if (generate_code) {
+ GenerateVecInv(instruction, type);
+ }
+ return true;
+ } else if (instruction->IsArrayGet()) {
+ // Accept a right-hand-side array base[index] for
+ // (1) exact matching vector type,
+ // (2) loop-invariant base,
+ // (3) unit stride index,
+ // (4) vectorizable right-hand-side value.
+ HInstruction* base = instruction->InputAt(0);
+ HInstruction* index = instruction->InputAt(1);
+ HInstruction* offset = nullptr;
+ if (type == instruction->GetType() &&
+ node->loop_info->IsDefinedOutOfTheLoop(base) &&
+ induction_range_.IsUnitStride(index, &offset)) {
+ if (generate_code) {
+ GenerateVecSub(index, offset);
+ GenerateVecMem(instruction, vector_map_->Get(index), nullptr, type);
+ } else {
+ vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false));
+ }
+ return true;
+ }
+ } else if (instruction->IsTypeConversion()) {
+ // Accept particular type conversions.
+ HTypeConversion* conversion = instruction->AsTypeConversion();
+ HInstruction* opa = conversion->InputAt(0);
+ Primitive::Type from = conversion->GetInputType();
+ Primitive::Type to = conversion->GetResultType();
+ if ((to == Primitive::kPrimByte ||
+ to == Primitive::kPrimChar ||
+ to == Primitive::kPrimShort) && from == Primitive::kPrimInt) {
+ // Accept a "narrowing" type conversion from a "wider" computation for
+ // (1) conversion into final required type,
+ // (2) vectorizable operand,
+ // (3) "wider" operations cannot bring in higher order bits.
+ if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
+ } else {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ }
+ return true;
+ }
+ } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) {
+ DCHECK_EQ(to, type);
+ // Accept int to float conversion for
+ // (1) supported int,
+ // (2) vectorizable operand.
+ if (TrySetVectorType(from, &restrictions) &&
+ VectorizeUse(node, opa, generate_code, from, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ }
+ return false;
+ } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) {
+ // Accept unary operator for vectorizable operand.
+ HInstruction* opa = instruction->InputAt(0);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ } else if (instruction->IsAdd() || instruction->IsSub() ||
+ instruction->IsMul() || instruction->IsDiv() ||
+ instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
+ // Deal with vector restrictions.
+ if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) ||
+ (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) {
+ return false;
+ }
+ // Accept binary operator for vectorizable operands.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+ }
+ return true;
+ }
+ } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Deal with vector restrictions.
+ if ((HasVectorRestrictions(restrictions, kNoShift)) ||
+ (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
+ return false; // unsupported instruction
+ } else if ((instruction->IsShr() || instruction->IsUShr()) &&
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ return false; // hibits may impact lobits; TODO: we can do better!
+ }
+ // Accept shift operator for vectorizable/invariant operands.
+ // TODO: accept symbolic, albeit loop invariant shift factors.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
+ if (generate_code) {
+ // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
+ // Note that even the narrower SIMD shifts do the right thing after that.
+ int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
+ ? kMaxLongShiftDistance
+ : kMaxIntShiftDistance;
+ HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
+ GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ }
+ return true;
+ }
+ } else if (instruction->IsInvokeStaticOrDirect()) {
+ // Accept particular intrinsics.
+ HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kMathAbsInt:
+ case Intrinsics::kMathAbsLong:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathAbsDouble: {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoAbs) ||
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // TODO: we can do better for some hibits cases.
+ return false;
+ }
+ // Accept ABS(x) for vectorizable operand.
+ HInstruction* opa = instruction->InputAt(0);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ } // switch
+ }
return false;
}
-bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) {
+ const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+ switch (compiler_driver_->GetInstructionSet()) {
+ case kArm:
+ case kThumb2:
+ return false;
+ case kArm64:
+ // Allow vectorization for all ARM devices, because Android assumes that
+ // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers
+ // (64-bit vectors) not Q registers (128-bit vectors).
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoDiv | kNoAbs;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv | kNoAbs;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(2);
+ case Primitive::kPrimFloat:
+ return TrySetVectorLength(2);
+ default:
+ return false;
+ }
+ case kX86:
+ case kX86_64:
+ // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+ if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+ return TrySetVectorLength(16);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv | kNoAbs;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimLong:
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+ return TrySetVectorLength(2);
+ case Primitive::kPrimFloat:
+ return TrySetVectorLength(4);
+ case Primitive::kPrimDouble:
+ return TrySetVectorLength(2);
+ default:
+ break;
+ } // switch type
+ }
+ return false;
+ case kMips:
+ case kMips64:
+ // TODO: implement MIPS SIMD.
+ return false;
+ default:
+ return false;
+ } // switch instruction set
+}
+
+bool HLoopOptimization::TrySetVectorLength(uint32_t length) {
+ DCHECK(IsPowerOfTwo(length) && length >= 2u);
+ // First time set?
+ if (vector_length_ == 0) {
+ vector_length_ = length;
+ }
+ // Different types are acceptable within a loop-body, as long as all the corresponding vector
+ // lengths match exactly to obtain a uniform traversal through the vector iteration space
+ // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions).
+ return vector_length_ == length;
+}
+
+void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) {
+ if (vector_map_->find(org) == vector_map_->end()) {
+ // In scalar code, just use a self pass-through for scalar invariants
+ // (viz. expression remains itself).
+ if (vector_mode_ == kSequential) {
+ vector_map_->Put(org, org);
+ return;
+ }
+ // In vector code, explicit scalar expansion is needed.
+ HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_map_->Put(org, Insert(vector_preheader_, vector));
+ }
+}
+
+void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) {
+ if (vector_map_->find(org) == vector_map_->end()) {
+ HInstruction* subscript = vector_phi_;
+ if (offset != nullptr) {
+ subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset);
+ if (org->IsPhi()) {
+ Insert(vector_body_, subscript); // lacks layout placeholder
+ }
+ }
+ vector_map_->Put(org, subscript);
+ }
+}
+
+void HLoopOptimization::GenerateVecMem(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type) {
+ HInstruction* vector = nullptr;
+ if (vector_mode_ == kVector) {
+ // Vector store or load.
+ if (opb != nullptr) {
+ vector = new (global_allocator_) HVecStore(
+ global_allocator_, org->InputAt(0), opa, opb, type, vector_length_);
+ } else {
+ vector = new (global_allocator_) HVecLoad(
+ global_allocator_, org->InputAt(0), opa, type, vector_length_);
+ }
+ } else {
+ // Scalar store or load.
+ DCHECK(vector_mode_ == kSequential);
+ if (opb != nullptr) {
+ vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc);
+ } else {
+ vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc);
+ }
+ }
+ vector_map_->Put(org, vector);
+}
+
+#define GENERATE_VEC(x, y) \
+ if (vector_mode_ == kVector) { \
+ vector = (x); \
+ } else { \
+ DCHECK(vector_mode_ == kSequential); \
+ vector = (y); \
+ } \
+ break;
+
+void HLoopOptimization::GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type) {
+ if (vector_mode_ == kSequential) {
+ // Scalar code follows implicit integral promotion.
+ if (type == Primitive::kPrimBoolean ||
+ type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort) {
+ type = Primitive::kPrimInt;
+ }
+ }
+ HInstruction* vector = nullptr;
+ switch (org->GetKind()) {
+ case HInstruction::kNeg:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HNeg(type, opa));
+ case HInstruction::kNot:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HNot(type, opa));
+ case HInstruction::kBooleanNot:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HBooleanNot(opa));
+ case HInstruction::kTypeConversion:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HTypeConversion(type, opa, kNoDexPc));
+ case HInstruction::kAdd:
+ GENERATE_VEC(
+ new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HAdd(type, opa, opb));
+ case HInstruction::kSub:
+ GENERATE_VEC(
+ new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HSub(type, opa, opb));
+ case HInstruction::kMul:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HMul(type, opa, opb));
+ case HInstruction::kDiv:
+ GENERATE_VEC(
+ new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HDiv(type, opa, opb, kNoDexPc));
+ case HInstruction::kAnd:
+ GENERATE_VEC(
+ new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HAnd(type, opa, opb));
+ case HInstruction::kOr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HOr(type, opa, opb));
+ case HInstruction::kXor:
+ GENERATE_VEC(
+ new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HXor(type, opa, opb));
+ case HInstruction::kShl:
+ GENERATE_VEC(
+ new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HShl(type, opa, opb));
+ case HInstruction::kShr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HShr(type, opa, opb));
+ case HInstruction::kUShr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HUShr(type, opa, opb));
+ case HInstruction::kInvokeStaticOrDirect: {
+ HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
+ if (vector_mode_ == kVector) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kMathAbsInt:
+ case Intrinsics::kMathAbsLong:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathAbsDouble:
+ DCHECK(opb == nullptr);
+ vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD intrinsic";
+ UNREACHABLE();
+ } // switch invoke
+ } else {
+ // In scalar code, simply clone the method invoke, and replace its operands with the
+ // corresponding new scalar instructions in the loop. The instruction will get an
+ // environment while being inserted from the instruction map in original program order.
+ DCHECK(vector_mode_ == kSequential);
+ HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
+ global_allocator_,
+ invoke->GetNumberOfArguments(),
+ invoke->GetType(),
+ invoke->GetDexPc(),
+ invoke->GetDexMethodIndex(),
+ invoke->GetResolvedMethod(),
+ invoke->GetDispatchInfo(),
+ invoke->GetInvokeType(),
+ invoke->GetTargetMethod(),
+ invoke->GetClinitCheckRequirement());
+ HInputsRef inputs = invoke->GetInputs();
+ for (size_t index = 0; index < inputs.size(); ++index) {
+ new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+ }
+ vector = new_invoke;
+ }
+ break;
+ }
+ default:
+ break;
+ } // switch
+ CHECK(vector != nullptr) << "Unsupported SIMD operator";
+ vector_map_->Put(org, vector);
+}
+
+#undef GENERATE_VEC
+
+//
+// Helpers.
+//
+
+bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+ DCHECK(iset_->empty());
ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
if (set != nullptr) {
- DCHECK(iset_->empty());
for (HInstruction* i : *set) {
// Check that, other than instructions that are no longer in the graph (removed earlier)
- // each instruction is removable and, other than the phi, uses are contained in the cycle.
+ // each instruction is removable and, when restrict uses are requested, other than for phi,
+ // all uses are contained within the cycle.
if (!i->IsInBlock()) {
continue;
} else if (!i->IsRemovable()) {
return false;
- } else if (i != phi) {
+ } else if (i != phi && restrict_uses) {
for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
if (set->find(use.GetUser()) == set->end()) {
return false;
@@ -344,10 +1059,12 @@ bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
// c: Condition(phi, bound)
// i: If(c)
// TODO: Find a less pattern matching approach?
-bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
DCHECK(iset_->empty());
HInstruction* phi = block->GetFirstPhi();
- if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+ if (phi != nullptr &&
+ phi->GetNext() == nullptr &&
+ TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
HInstruction* s = block->GetFirstInstruction();
if (s != nullptr && s->IsSuspendCheck()) {
HInstruction* c = s->GetNext();
@@ -365,14 +1082,24 @@ bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
}
bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
- if (block->GetFirstPhi() == nullptr) {
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
- return false;
- }
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
+ HInstruction* instruction) {
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
+ return true;
}
- return true;
}
return false;
}
@@ -434,6 +1161,19 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi
return false;
}
+bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ HBasicBlock* block,
+ bool collect_loop_uses) {
+ // Assigning the last value is always successful if there are no uses.
+ // Otherwise, it succeeds in a no early-exit loop by generating the
+ // proper last value assignment.
+ int32_t use_count = 0;
+ return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) &&
+ (use_count == 0 ||
+ (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block)));
+}
+
void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) {
for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) {
HInstruction* instruction = i.Current();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9ddab4150c..d8f50aab28 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -23,13 +23,18 @@
namespace art {
+class CompilerDriver;
+
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
- * the detected nested loops, such as removal of dead induction and empty loops.
+ * the detected nested loops, such as removal of dead induction and empty loops
+ * and inner loop vectorization.
*/
class HLoopOptimization : public HOptimization {
public:
- HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis);
+ HLoopOptimization(HGraph* graph,
+ CompilerDriver* compiler_driver,
+ HInductionVarAnalysis* induction_analysis);
void Run() OVERRIDE;
@@ -46,36 +51,111 @@ class HLoopOptimization : public HOptimization {
inner(nullptr),
previous(nullptr),
next(nullptr) {}
- HLoopInformation* const loop_info;
+ HLoopInformation* loop_info;
LoopNode* outer;
LoopNode* inner;
LoopNode* previous;
LoopNode* next;
};
- void LocalRun();
+ /*
+ * Vectorization restrictions (bit mask).
+ */
+ enum VectorRestrictions {
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoAbs = 32, // no absolute value
+ };
+
+ /*
+ * Vectorization mode during synthesis
+ * (sequential peeling/cleanup loop or vector loop).
+ */
+ enum VectorMode {
+ kSequential,
+ kVector
+ };
+
+ /*
+ * Representation of a unit-stride array reference.
+ */
+ struct ArrayReference {
+ ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
+ : base(b), offset(o), type(t), lhs(l) { }
+ bool operator<(const ArrayReference& other) const {
+ return
+ (base < other.base) ||
+ (base == other.base &&
+ (offset < other.offset || (offset == other.offset &&
+ (type < other.type ||
+ (type == other.type && lhs < other.lhs)))));
+ }
+ HInstruction* base; // base address
+ HInstruction* offset; // offset + i
+ Primitive::Type type; // component type
+ bool lhs; // def/use
+ };
+ // Loop setup and traversal.
+ void LocalRun();
void AddLoop(HLoopInformation* loop_info);
void RemoveLoop(LoopNode* node);
-
void TraverseLoopsInnerToOuter(LoopNode* node);
- // Simplification.
+ // Optimization.
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- bool SimplifyInnerLoop(LoopNode* node);
+ void OptimizeInnerLoop(LoopNode* node);
+
+ // Vectorization analysis and synthesis.
+ bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
+ void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
+ void GenerateNewLoop(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step);
+ bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
+ bool VectorizeUse(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
+ bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
+ bool TrySetVectorLength(uint32_t length);
+ void GenerateVecInv(HInstruction* org, Primitive::Type type);
+ void GenerateVecSub(HInstruction* org, HInstruction* off);
+ void GenerateVecMem(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type);
+ void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
// Helpers.
- bool IsPhiInduction(HPhi* phi);
- bool IsEmptyHeader(HBasicBlock* block);
+ bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
+ bool TrySetSimpleLoopHeader(HBasicBlock* block);
bool IsEmptyBody(HBasicBlock* block);
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
bool collect_loop_uses,
/*out*/ int32_t* use_count);
+ bool IsUsedOutsideLoop(HLoopInformation* loop_info,
+ HInstruction* instruction);
bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
+ bool TryAssignLastValue(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ HBasicBlock* block,
+ bool collect_loop_uses);
void RemoveDeadInstructions(const HInstructionList& list);
+ // Compiler driver (to query ISA features).
+ const CompilerDriver* compiler_driver_;
+
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
@@ -83,6 +163,9 @@ class HLoopOptimization : public HOptimization {
// through this allocator is immediately released when the loop optimizer is done.
ArenaAllocator* loop_allocator_;
+ // Global heap memory allocator. Used to build HIR.
+ ArenaAllocator* global_allocator_;
+
// Entries into the loop hierarchy representation. The hierarchy resides
// in phase-local heap memory.
LoopNode* top_loop_;
@@ -95,11 +178,33 @@ class HLoopOptimization : public HOptimization {
// Counter that tracks how many induction cycles have been simplified. Useful
// to trigger incremental updates of induction variable analysis of outer loops
// when the induction of inner loops has changed.
- int32_t induction_simplication_count_;
+ uint32_t induction_simplication_count_;
// Flag that tracks if any simplifications have occurred.
bool simplified_;
+ // Number of "lanes" for selected packed type.
+ uint32_t vector_length_;
+
+ // Set of array references in the vector loop.
+ // Contents reside in phase-local heap memory.
+ ArenaSet<ArrayReference>* vector_refs_;
+
+ // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
+ // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
+ // structure maps original instructions into the new instructions.
+ // Contents reside in phase-local heap memory.
+ ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
+
+ // Temporary vectorization bookkeeping.
+ HBasicBlock* vector_preheader_; // preheader of the new loop
+ HBasicBlock* vector_header_; // header of the new loop
+ HBasicBlock* vector_body_; // body of the new loop
+ HInstruction* vector_runtime_test_a_;
+ HInstruction* vector_runtime_test_b_; // defines a != b runtime test
+ HPhi* vector_phi_; // the Phi representing the normalized loop index
+ VectorMode vector_mode_; // selects synthesis mode
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 9a6b4935b2..5b9350689e 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest {
allocator_(&pool_),
graph_(CreateGraph(&allocator_)),
iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
- loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) {
+ loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
BuildGraph();
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 62c89100eb..e71fea92a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
DCHECK(env_uses_.empty());
}
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+ const HUseList<HInstruction*>& uses = GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (dominator->StrictlyDominates(user)) {
+ user->ReplaceInput(replacement, index);
+ }
+ }
+}
+
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
HUserRecord<HInstruction*> input_use = InputRecordAt(index);
if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
}
}
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+ switch (rhs) {
+ case HDeoptimize::Kind::kBCE:
+ return os << "bce";
+ case HDeoptimize::Kind::kInline:
+ return os << "inline";
+ default:
+ LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+ UNREACHABLE();
+ }
+}
+
bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
return this == instruction->GetPreviousDisregardingMoves();
}
@@ -2046,6 +2071,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasSIMD()) {
+ outer_graph->SetHasSIMD(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
@@ -2179,6 +2207,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
}
if (rerun_loop_analysis) {
+ DCHECK(!outer_graph->HasIrreducibleLoops())
+ << "Recomputing loop information in graphs with irreducible loops "
+ << "is unsupported, as it could lead to loop header changes";
outer_graph->ClearLoopInformation();
outer_graph->ClearDominanceInformation();
outer_graph->BuildDominatorTree();
@@ -2309,6 +2340,68 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
new_pre_header, old_pre_header, /* replace_if_back_edge */ false);
}
+HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
+ HBasicBlock* body,
+ HBasicBlock* exit) {
+ DCHECK(header->IsLoopHeader());
+ HLoopInformation* loop = header->GetLoopInformation();
+
+ // Add new loop blocks.
+ HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc());
+ AddBlock(new_pre_header);
+ AddBlock(new_header);
+ AddBlock(new_body);
+
+ // Set up control flow.
+ header->ReplaceSuccessor(exit, new_pre_header);
+ new_pre_header->AddSuccessor(new_header);
+ new_header->AddSuccessor(exit);
+ new_header->AddSuccessor(new_body);
+ new_body->AddSuccessor(new_header);
+
+ // Set up dominators.
+ header->ReplaceDominatedBlock(exit, new_pre_header);
+ new_pre_header->SetDominator(header);
+ new_pre_header->dominated_blocks_.push_back(new_header);
+ new_header->SetDominator(new_pre_header);
+ new_header->dominated_blocks_.push_back(new_body);
+ new_body->SetDominator(new_header);
+ new_header->dominated_blocks_.push_back(exit);
+ exit->SetDominator(new_header);
+
+ // Fix reverse post order.
+ size_t index_of_header = IndexOfElement(reverse_post_order_, header);
+ MakeRoomFor(&reverse_post_order_, 2, index_of_header);
+ reverse_post_order_[++index_of_header] = new_pre_header;
+ reverse_post_order_[++index_of_header] = new_header;
+ size_t index_of_body = IndexOfElement(reverse_post_order_, body);
+ MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1);
+ reverse_post_order_[index_of_body] = new_body;
+
+ // Add gotos and suspend check (client must add conditional in header).
+ new_pre_header->AddInstruction(new (arena_) HGoto());
+ HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc());
+ new_header->AddInstruction(suspend_check);
+ new_body->AddInstruction(new (arena_) HGoto());
+ suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment(
+ loop->GetSuspendCheck()->GetEnvironment(), header);
+
+ // Update loop information.
+ new_header->AddBackEdge(new_body);
+ new_header->GetLoopInformation()->SetSuspendCheck(suspend_check);
+ new_header->GetLoopInformation()->Populate();
+ new_pre_header->SetLoopInformation(loop->GetPreHeader()->GetLoopInformation()); // outward
+ HLoopInformationOutwardIterator it(*new_header);
+ for (it.Advance(); !it.Done(); it.Advance()) {
+ it.Current()->Add(new_pre_header);
+ it.Current()->Add(new_header);
+ it.Current()->Add(new_body);
+ }
+ return new_pre_header;
+}
+
static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (rti.IsValid()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a9e61875a..671f950aa6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
@@ -340,6 +341,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_current_method_(nullptr),
+ art_method_(nullptr),
inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
osr_(osr),
cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
@@ -398,6 +400,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// put deoptimization instructions, etc.
void TransformLoopHeaderForBCE(HBasicBlock* header);
+ // Adds a new loop directly after the loop with the given header and exit.
+ // Returns the new preheader.
+ HBasicBlock* TransformLoopForVectorization(HBasicBlock* header,
+ HBasicBlock* body,
+ HBasicBlock* exit);
+
// Removes `block` from the graph. Assumes `block` has been disconnected from
// other blocks and has no instructions or phis.
void DeleteDeadEmptyBlock(HBasicBlock* block);
@@ -560,6 +568,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasSIMD() const { return has_simd_; }
+ void SetHasSIMD(bool value) { has_simd_ = value; }
+
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -652,6 +663,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// false positives.
bool has_try_catch_;
+ // Flag whether SIMD instructions appear in the graph. If true, the
+ // code generators may have to be more careful spilling the wider
+ // contents of SIMD registers.
+ bool has_simd_;
+
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false. It's only best effort to keep it up
// to date in the presence of code elimination so there might be false
@@ -1353,6 +1369,26 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(TypeConversion, Instruction) \
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
+ M(VecReplicateScalar, VecUnaryOperation) \
+ M(VecSetScalars, VecUnaryOperation) \
+ M(VecSumReduce, VecUnaryOperation) \
+ M(VecCnv, VecUnaryOperation) \
+ M(VecNeg, VecUnaryOperation) \
+ M(VecAbs, VecUnaryOperation) \
+ M(VecNot, VecUnaryOperation) \
+ M(VecAdd, VecBinaryOperation) \
+ M(VecSub, VecBinaryOperation) \
+ M(VecMul, VecBinaryOperation) \
+ M(VecDiv, VecBinaryOperation) \
+ M(VecAnd, VecBinaryOperation) \
+ M(VecAndNot, VecBinaryOperation) \
+ M(VecOr, VecBinaryOperation) \
+ M(VecXor, VecBinaryOperation) \
+ M(VecShl, VecBinaryOperation) \
+ M(VecShr, VecBinaryOperation) \
+ M(VecUShr, VecBinaryOperation) \
+ M(VecLoad, VecMemoryOperation) \
+ M(VecStore, VecMemoryOperation) \
/*
* Instructions, shared across several (not all) architectures.
@@ -1414,7 +1450,11 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(Constant, Instruction) \
M(UnaryOperation, Instruction) \
M(BinaryOperation, Instruction) \
- M(Invoke, Instruction)
+ M(Invoke, Instruction) \
+ M(VecOperation, Instruction) \
+ M(VecUnaryOperation, VecOperation) \
+ M(VecBinaryOperation, VecOperation) \
+ M(VecMemoryOperation, VecOperation)
#define FOR_EACH_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION(M) \
@@ -1734,11 +1774,11 @@ class SideEffects : public ValueObject {
// A HEnvironment object contains the values of virtual registers at a given location.
class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
public:
- HEnvironment(ArenaAllocator* arena,
- size_t number_of_vregs,
- ArtMethod* method,
- uint32_t dex_pc,
- HInstruction* holder)
+ ALWAYS_INLINE HEnvironment(ArenaAllocator* arena,
+ size_t number_of_vregs,
+ ArtMethod* method,
+ uint32_t dex_pc,
+ HInstruction* holder)
: vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
parent_(nullptr),
@@ -1747,7 +1787,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
holder_(holder) {
}
- HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
+ ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
: HEnvironment(arena,
to_copy.Size(),
to_copy.GetMethod(),
@@ -1914,6 +1954,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
virtual bool IsControlFlow() const { return false; }
+ // Can the instruction throw?
+ // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
+ // could throw OOME, but it is still OK to remove them if they are unused.
virtual bool CanThrow() const { return false; }
bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
@@ -2068,6 +2111,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
+ void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
// This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2931,28 +2975,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
};
// Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
+ enum class Kind {
+ kBCE,
+ kInline,
+ kLast = kInline
+ };
+
+ // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+ // across.
+ HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::All(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 1,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(false);
+ SetPackedField<DeoptimizeKindField>(kind);
+ SetRawInputAt(0, cond);
+ }
+
+ // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+ // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+ // instead of `guard`.
// We set CanTriggerGC to prevent any intermediate address to be live
// at the point of the `HDeoptimize`.
- HDeoptimize(HInstruction* cond, uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+ HDeoptimize(ArenaAllocator* arena,
+ HInstruction* cond,
+ HInstruction* guard,
+ Kind kind,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::CanTriggerGC(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 2,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(true);
+ SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
+ SetRawInputAt(1, guard);
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
}
+
bool NeedsEnvironment() const OVERRIDE { return true; }
+
bool CanThrow() const OVERRIDE { return true; }
+ Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+ Primitive::Type GetType() const OVERRIDE {
+ return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+ }
+
+ bool GuardsAnInput() const {
+ return InputCount() == 2;
+ }
+
+ HInstruction* GuardedInput() const {
+ DCHECK(GuardsAnInput());
+ return InputAt(1);
+ }
+
+ void RemoveGuard() {
+ RemoveInputAt(1);
+ }
+
DECLARE_INSTRUCTION(Deoptimize);
private:
+ static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+ static constexpr size_t kFieldDeoptimizeKindSize =
+ MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+ static constexpr size_t kNumberOfDeoptimizePackedBits =
+ kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+ static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
};
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
// The compiled code checks this flag value in a guard before devirtualized call and
// if it's true, starts to do deoptimization.
@@ -3912,6 +4025,7 @@ class HInvoke : public HVariableInputSizeInstruction {
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
ArtMethod* GetResolvedMethod() const { return resolved_method_; }
+ void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; }
DECLARE_ABSTRACT_INSTRUCTION(Invoke);
@@ -3954,7 +4068,7 @@ class HInvoke : public HVariableInputSizeInstruction {
}
uint32_t number_of_arguments_;
- ArtMethod* const resolved_method_;
+ ArtMethod* resolved_method_;
const uint32_t dex_method_index_;
Intrinsics intrinsic_;
@@ -4111,6 +4225,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
dispatch_info_ = dispatch_info;
}
+ DispatchInfo GetDispatchInfo() const {
+ return dispatch_info_;
+ }
+
void AddSpecialInput(HInstruction* input) {
// We allow only one special input.
DCHECK(!IsStringInit() && !HasCurrentMethodInput());
@@ -5541,8 +5659,6 @@ class HLoadClass FINAL : public HInstruction {
// Use a known boot image Class* address, embedded in the code by the codegen.
// Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
- // Note: codegen needs to emit a linker patch if indicated by compiler options'
- // GetIncludePatchInformation().
kBootImageAddress,
// Load from an entry in the .bss section using a PC-relative load.
@@ -5746,8 +5862,6 @@ class HLoadString FINAL : public HInstruction {
// Use a known boot image String* address, embedded in the code by the codegen.
// Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
- // Note: codegen needs to emit a linker patch if indicated by compiler options'
- // GetIncludePatchInformation().
kBootImageAddress,
// Load from an entry in the .bss section using a PC-relative load.
@@ -6609,6 +6723,8 @@ class HParallelMove FINAL : public HTemplateInstruction<0> {
} // namespace art
+#include "nodes_vector.h"
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
#include "nodes_shared.h"
#endif
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
new file mode 100644
index 0000000000..0cbbf2a215
--- /dev/null
+++ b/compiler/optimizing/nodes_vector.h
@@ -0,0 +1,604 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+
+// This #include should never be used by compilation, because this header file (nodes_vector.h)
+// is included in the header file nodes.h itself. However it gives editing tools better context.
+#include "nodes.h"
+
+namespace art {
+
+// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base,
+// and base is a power of two. For example, the value Alignment(16, 0) means memory is
+// perfectly aligned at a 16-byte boundary, whereas the value Alignment(16, 4) means
+// memory is always exactly 4 bytes above such a boundary.
+class Alignment {
+ public:
+ Alignment(size_t base, size_t offset) : base_(base), offset_(offset) {
+ DCHECK_LT(offset, base);
+ DCHECK(IsPowerOfTwo(base));
+ }
+
+ // Returns true if memory is "at least" aligned at the given boundary.
+ // Assumes requested base is power of two.
+ bool IsAlignedAt(size_t base) const {
+ DCHECK_NE(0u, base);
+ DCHECK(IsPowerOfTwo(base));
+ return ((offset_ | base_) & (base - 1u)) == 0;
+ }
+
+ std::string ToString() const {
+ return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")";
+ }
+
+ private:
+ size_t base_;
+ size_t offset_;
+};
+
+//
+// Definitions of abstract vector operations in HIR.
+//
+
+// Abstraction of a vector operation, i.e., an operation that performs
+// GetVectorLength() x GetPackedType() operations simultaneously.
+class HVecOperation : public HVariableInputSizeInstruction {
+ public:
+ HVecOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ SideEffects side_effects,
+ size_t number_of_inputs,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(side_effects,
+ dex_pc,
+ arena,
+ number_of_inputs,
+ kArenaAllocVectorNode),
+ vector_length_(vector_length) {
+ SetPackedField<TypeField>(packed_type);
+ DCHECK_LT(1u, vector_length);
+ }
+
+ // Returns the number of elements packed in a vector.
+ size_t GetVectorLength() const {
+ return vector_length_;
+ }
+
+ // Returns the number of bytes in a full vector.
+ size_t GetVectorNumberOfBytes() const {
+ return vector_length_ * Primitive::ComponentSize(GetPackedType());
+ }
+
+ // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
+ Primitive::Type GetType() const OVERRIDE {
+ return Primitive::kPrimDouble;
+ }
+
+ // Returns the true component type packed in a vector.
+ Primitive::Type GetPackedType() const {
+ return GetPackedField<TypeField>();
+ }
+
+ DECLARE_ABSTRACT_INSTRUCTION(VecOperation);
+
+ private:
+ // Additional packed bits.
+ static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldTypeSize =
+ MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+ static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize;
+ static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+ using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+ const size_t vector_length_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecOperation);
+};
+
+// Abstraction of a unary vector operation.
+class HVecUnaryOperation : public HVecOperation {
+ public:
+ HVecUnaryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /*number_of_inputs*/ 1,
+ vector_length,
+ dex_pc) { }
+ DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation);
+};
+
+// Abstraction of a binary vector operation.
+class HVecBinaryOperation : public HVecOperation {
+ public:
+ HVecBinaryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /*number_of_inputs*/ 2,
+ vector_length,
+ dex_pc) { }
+ DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation);
+};
+
+// Abstraction of a vector operation that references memory, with an alignment.
+// The Android runtime guarantees at least "component size" alignment for array
+// elements and, thus, vectors.
+class HVecMemoryOperation : public HVecOperation {
+ public:
+ HVecMemoryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ SideEffects side_effects,
+ size_t number_of_inputs,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
+ alignment_(Primitive::ComponentSize(packed_type), 0) { }
+
+ void SetAlignment(Alignment alignment) { alignment_ = alignment; }
+
+ Alignment GetAlignment() const { return alignment_; }
+
+ DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
+
+ private:
+ Alignment alignment_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
+};
+
+//
+// Definitions of concrete vector operations in HIR.
+//
+
+// Replicates the given scalar into a vector,
+// viz. replicate(x) = [ x, .. , x ].
+class HVecReplicateScalar FINAL : public HVecUnaryOperation {
+ public:
+ HVecReplicateScalar(ArenaAllocator* arena,
+ HInstruction* scalar,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ SetRawInputAt(0, scalar);
+ }
+ DECLARE_INSTRUCTION(VecReplicateScalar);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
+};
+
+// Assigns the given scalar elements to a vector,
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+class HVecSetScalars FINAL : public HVecUnaryOperation {
+ HVecSetScalars(ArenaAllocator* arena,
+ HInstruction** scalars, // array
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ for (size_t i = 0; i < vector_length; i++) {
+ SetRawInputAt(0, scalars[i]);
+ }
+ }
+ DECLARE_INSTRUCTION(VecSetScalars);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
+};
+
+// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
+// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
+class HVecSumReduce FINAL : public HVecUnaryOperation {
+ HVecSumReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+
+ // TODO: probably integral promotion
+ Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
+
+ DECLARE_INSTRUCTION(VecSumReduce);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
+};
+
+// Converts every component in the vector,
+// viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ].
+class HVecCnv FINAL : public HVecUnaryOperation {
+ public:
+ HVecCnv(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type); // actual convert
+ SetRawInputAt(0, input);
+ }
+
+ Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
+ Primitive::Type GetResultType() const { return GetPackedType(); }
+
+ DECLARE_INSTRUCTION(VecCnv);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecCnv);
+};
+
+// Negates every component in the vector,
+// viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ].
+class HVecNeg FINAL : public HVecUnaryOperation {
+ public:
+ HVecNeg(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecNeg);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecNeg);
+};
+
+// Takes absolute value of every component in the vector,
+// viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ].
+class HVecAbs FINAL : public HVecUnaryOperation {
+ public:
+ HVecAbs(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecAbs);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAbs);
+};
+
+// Bitwise- or boolean-nots every component in the vector,
+// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or
+// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean.
+class HVecNot FINAL : public HVecUnaryOperation {
+ public:
+ HVecNot(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecNot);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecNot);
+};
+
+// Adds every component in the two vectors,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ].
+class HVecAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecAdd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAdd);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAdd);
+};
+
+// Subtracts every component in the two vectors,
+// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
+class HVecSub FINAL : public HVecBinaryOperation {
+ public:
+ HVecSub(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecSub);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSub);
+};
+
+// Multiplies every component in the two vectors,
+// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
+class HVecMul FINAL : public HVecBinaryOperation {
+ public:
+ HVecMul(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMul);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMul);
+};
+
+// Divides every component in the two vectors,
+// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ].
+class HVecDiv FINAL : public HVecBinaryOperation {
+ public:
+ HVecDiv(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecDiv);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecDiv);
+};
+
+// Bitwise-ands every component in the two vectors,
+// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
+class HVecAnd FINAL : public HVecBinaryOperation {
+ public:
+ HVecAnd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAnd);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAnd);
+};
+
+// Bitwise-and-nots every component in the two vectors,
+// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ].
+class HVecAndNot FINAL : public HVecBinaryOperation {
+ public:
+ HVecAndNot(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAndNot);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAndNot);
+};
+
+// Bitwise-ors every component in the two vectors,
+// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ].
+class HVecOr FINAL : public HVecBinaryOperation {
+ public:
+ HVecOr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecOr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecOr);
+};
+
+// Bitwise-xors every component in the two vectors,
+// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ].
+class HVecXor FINAL : public HVecBinaryOperation {
+ public:
+ HVecXor(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecXor);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecXor);
+};
+
+// Logically shifts every component in the vector left by the given distance,
+// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ].
+class HVecShl FINAL : public HVecBinaryOperation {
+ public:
+ HVecShl(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecShl);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecShl);
+};
+
+// Arithmetically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ].
+class HVecShr FINAL : public HVecBinaryOperation {
+ public:
+ HVecShr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecShr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecShr);
+};
+
+// Logically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ].
+class HVecUShr FINAL : public HVecBinaryOperation {
+ public:
+ HVecUShr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecUShr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecUShr);
+};
+
+// Loads a vector from memory, viz. load(mem, 1)
+// yield the vector [ mem(1), .. , mem(n) ].
+class HVecLoad FINAL : public HVecMemoryOperation {
+ public:
+ HVecLoad(ArenaAllocator* arena,
+ HInstruction* base,
+ HInstruction* index,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecMemoryOperation(arena,
+ packed_type,
+ SideEffects::ArrayReadOfType(packed_type),
+ /*number_of_inputs*/ 2,
+ vector_length,
+ dex_pc) {
+ SetRawInputAt(0, base);
+ SetRawInputAt(1, index);
+ }
+ DECLARE_INSTRUCTION(VecLoad);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecLoad);
+};
+
+// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] )
+// sets mem(1) = x1, .. , mem(n) = xn.
+class HVecStore FINAL : public HVecMemoryOperation {
+ public:
+ HVecStore(ArenaAllocator* arena,
+ HInstruction* base,
+ HInstruction* index,
+ HInstruction* value,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecMemoryOperation(arena,
+ packed_type,
+ SideEffects::ArrayWriteOfType(packed_type),
+ /*number_of_inputs*/ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(value->IsVecOperation());
+ DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, base);
+ SetRawInputAt(1, index);
+ SetRawInputAt(2, value);
+ }
+ DECLARE_INSTRUCTION(VecStore);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecStore);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index d84fe6ccff..60af2b4201 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -174,53 +174,45 @@ static constexpr uint8_t expected_cfi_kMips[] = {
// 0x00000034: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64[] = {
- 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
- 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
- 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
- 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
- 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+ 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+ 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+ 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+ 0x00, 0x00, 0x1F, 0xD8,
};
-
static constexpr uint8_t expected_cfi_kMips64[] = {
- 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
- 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
- 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+ 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
+ 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
// 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: daddiu r29, r29, 24
-// 0x00000020: .cfi_def_cfa_offset: 40
-// 0x00000020: ldc1 f24, +0(r29)
-// 0x00000024: .cfi_restore: r56
-// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000018: .cfi_remember_state
+// 0x00000018: ld r31, +56(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: ld r17, +48(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: ld r16, +40(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f25, +32(r29)
// 0x00000028: .cfi_restore: r57
-// 0x00000028: ld r16, +16(r29)
-// 0x0000002c: .cfi_restore: r16
-// 0x0000002c: ld r17, +24(r29)
-// 0x00000030: .cfi_restore: r17
-// 0x00000030: ld r31, +32(r29)
-// 0x00000034: .cfi_restore: r31
-// 0x00000034: daddiu r29, r29, 40
-// 0x00000038: .cfi_def_cfa_offset: 0
-// 0x00000038: jr r31
-// 0x0000003c: nop
-// 0x00000040: .cfi_restore_state
-// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000028: ldc1 f24, +24(r29)
+// 0x0000002c: .cfi_restore: r56
+// 0x0000002c: daddiu r29, r29, 64
+// 0x00000030: .cfi_def_cfa_offset: 0
+// 0x00000030: jic r31, 0
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
#ifdef ART_USE_OLD_ARM_BACKEND
@@ -403,58 +395,52 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = {
// 0x00020060: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
- 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
- 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
- 0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
- 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+ 0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+ 0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+ 0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
};
static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
- 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
- 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
- 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+ 0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+ 0x00, 0x00, 0x1F, 0xD8,
};
static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
- 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
- 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
- 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
- 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+ 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A,
+ 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
+ 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
// 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
-// 0x00000020: auipc r1, 2
-// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x00000028: nop
+// 0x00000018: bnec r5, r6, 0x00000024 ; +12
+// 0x0000001c: auipc r1, 2
+// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000024: nop
// ...
-// 0x00020028: nop
-// 0x0002002c: .cfi_remember_state
-// 0x0002002c: daddiu r29, r29, 24
-// 0x00020030: .cfi_def_cfa_offset: 40
-// 0x00020030: ldc1 f24, +0(r29)
-// 0x00020034: .cfi_restore: r56
-// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020024: nop
+// 0x00020028: .cfi_remember_state
+// 0x00020028: ld r31, +56(r29)
+// 0x0002002c: .cfi_restore: r31
+// 0x0002002c: ld r17, +48(r29)
+// 0x00020030: .cfi_restore: r17
+// 0x00020030: ld r16, +40(r29)
+// 0x00020034: .cfi_restore: r16
+// 0x00020034: ldc1 f25, +32(r29)
// 0x00020038: .cfi_restore: r57
-// 0x00020038: ld r16, +16(r29)
-// 0x0002003c: .cfi_restore: r16
-// 0x0002003c: ld r17, +24(r29)
-// 0x00020040: .cfi_restore: r17
-// 0x00020040: ld r31, +32(r29)
-// 0x00020044: .cfi_restore: r31
-// 0x00020044: daddiu r29, r29, 40
-// 0x00020047: .cfi_def_cfa_offset: 0
-// 0x00020048: jr r31
-// 0x0002004c: nop
-// 0x00020050: .cfi_restore_state
-// 0x00020050: .cfi_def_cfa_offset: 64
+// 0x00020038: ldc1 f24, +24(r29)
+// 0x0002003c: .cfi_restore: r56
+// 0x0002003c: daddiu r29, r29, 64
+// 0x00020040: .cfi_def_cfa_offset: 0
+// 0x00020040: jic r31, 0
+// 0x00020044: .cfi_restore_state
+// 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f72bd6a5a3..e542cbbe37 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
#include "builder.h"
#include "cha_guard_optimization.h"
#include "code_generator.h"
+#include "code_sinking.h"
#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
@@ -448,15 +449,6 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) {
|| instruction_set == kX86_64;
}
-// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
-// TODO: Add support for other architectures and remove this function
-static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
- return instruction_set == kArm64
- || instruction_set == kThumb2
- || instruction_set == kX86
- || instruction_set == kX86_64;
-}
-
// Strip pass name suffix to get optimization name.
static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
size_t pos = pass_name.find(kPassNameSeparator);
@@ -498,7 +490,8 @@ static HOptimization* BuildOptimization(
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
} else if (opt_name == HSharpening::kSharpeningPassName) {
return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
} else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
@@ -506,7 +499,7 @@ static HOptimization* BuildOptimization(
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
return new (arena) HInductionVarAnalysis(graph);
} else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
- return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+ return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
} else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
return new (arena) IntrinsicsRecognizer(graph, stats);
} else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -518,9 +511,11 @@ static HOptimization* BuildOptimization(
} else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
- return new (arena) HLoopOptimization(graph, most_recent_induction);
+ return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
} else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
return new (arena) CHAGuardOptimization(graph);
+ } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
+ return new (arena) CodeSinking(graph, stats);
#ifdef ART_ENABLE_CODEGEN_arm
} else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -604,8 +599,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
VariableSizedHandleScope* handles) const {
OptimizingCompilerStats* stats = compilation_stats_.get();
const CompilerOptions& compiler_options = driver->GetCompilerOptions();
- bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
- && (compiler_options.GetInlineMaxCodeUnits() > 0);
+ bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
if (!should_inline) {
return;
}
@@ -620,7 +614,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
HOptimization* optimizations[] = { inliner };
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -765,28 +760,32 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
graph, stats, "dead_code_elimination$final");
HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
- InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+ InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
HConstantFolding* fold2 = new (arena) HConstantFolding(
graph, "constant_folding$after_inlining");
HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
- SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
- GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
- LICM* licm = new (arena) LICM(graph, *side_effects, stats);
- LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
+ SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis(
+ graph, "side_effects$before_gvn");
+ SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis(
+ graph, "side_effects$before_lse");
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1);
+ LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
- BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
- HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
+ BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
+ HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+ LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
HSharpening* sharpening = new (arena) HSharpening(
graph, codegen, dex_compilation_unit, driver, handles);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$after_inlining");
+ graph, codegen, stats, "instruction_simplifier$after_inlining");
InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$after_bce");
+ graph, codegen, stats, "instruction_simplifier$after_bce");
InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$before_codegen");
+ graph, codegen, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
+ CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
HOptimization* optimizations1[] = {
intrinsics,
@@ -806,7 +805,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
fold2, // TODO: if we don't inline we can also skip fold2.
simplify2,
dce2,
- side_effects,
+ side_effects1,
gvn,
licm,
induction,
@@ -814,9 +813,11 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
loop,
fold3, // evaluates code generated by dynamic bce
simplify3,
+ side_effects2,
lse,
cha_guard,
dce3,
+ code_sinking,
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
@@ -847,8 +848,15 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
const DexFile::CodeItem* code_item) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
- stack_map.resize(codegen->ComputeStackMapsSize());
- codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
+ ArenaVector<uint8_t> method_info(arena->Adapter(kArenaAllocStackMaps));
+ size_t stack_map_size = 0;
+ size_t method_info_size = 0;
+ codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
+ stack_map.resize(stack_map_size);
+ method_info.resize(method_info_size);
+ codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()),
+ MemoryRegion(method_info.data(), method_info.size()),
+ *code_item);
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
@@ -860,7 +868,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const SrcMapElem>(),
+ ArrayRef<const uint8_t>(method_info),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const LinkerPatch>(linker_patches));
@@ -895,12 +903,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
return nullptr;
}
- // When read barriers are enabled, do not attempt to compile for
- // instruction sets that have no read barrier support.
- if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) {
- return nullptr;
- }
-
if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
return nullptr;
@@ -1091,13 +1093,10 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
if (kIsDebugBuild &&
IsCompilingWithCoreImage() &&
- IsInstructionSetSupported(compiler_driver->GetInstructionSet()) &&
- (!kEmitCompilerReadBarrier ||
- InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) {
+ IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
// For testing purposes, we put a special marker on method names
- // that should be compiled with this compiler (when the the
- // instruction set is supported -- and has support for read
- // barriers, if they are enabled). This makes sure we're not
+ // that should be compiled with this compiler (when the
+ // instruction set is supported). This makes sure we're not
// regressing.
std::string method_name = dex_file.PrettyMethod(method_idx);
bool shouldCompile = method_name.find("$opt$") != std::string::npos;
@@ -1191,7 +1190,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
}
}
- size_t stack_map_size = codegen->ComputeStackMapsSize();
+ size_t stack_map_size = 0;
+ size_t method_info_size = 0;
+ codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
size_t number_of_roots = codegen->GetNumberOfJitRoots();
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
// We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
@@ -1207,20 +1208,30 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return false;
}
uint8_t* stack_map_data = nullptr;
+ uint8_t* method_info_data = nullptr;
uint8_t* roots_data = nullptr;
- uint32_t data_size = code_cache->ReserveData(
- self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
+ uint32_t data_size = code_cache->ReserveData(self,
+ stack_map_size,
+ method_info_size,
+ number_of_roots,
+ method,
+ &stack_map_data,
+ &method_info_data,
+ &roots_data);
if (stack_map_data == nullptr || roots_data == nullptr) {
return false;
}
MaybeRecordStat(MethodCompilationStat::kCompiled);
- codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+ codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
+ MemoryRegion(method_info_data, method_info_size),
+ *code_item);
codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data);
const void* code = code_cache->CommitCode(
self,
method,
stack_map_data,
+ method_info_data,
roots_data,
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7240d40d7f..a211c5472a 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -68,6 +68,24 @@ enum MethodCompilationStat {
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
kSimplifyIf,
+ kInstructionSunk,
+ kNotInlinedUnresolvedEntrypoint,
+ kNotInlinedDexCache,
+ kNotInlinedStackMaps,
+ kNotInlinedEnvironmentBudget,
+ kNotInlinedInstructionBudget,
+ kNotInlinedLoopWithoutExit,
+ kNotInlinedIrreducibleLoop,
+ kNotInlinedAlwaysThrows,
+ kNotInlinedInfiniteLoop,
+ kNotInlinedTryCatch,
+ kNotInlinedRegisterAllocator,
+ kNotInlinedCannotBuild,
+ kNotInlinedNotVerified,
+ kNotInlinedCodeItem,
+ kNotInlinedWont,
+ kNotInlinedRecursiveBudget,
+ kNotInlinedProxy,
kLastStat
};
@@ -166,6 +184,24 @@ class OptimizingCompilerStats {
case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
case kSimplifyIf: name = "SimplifyIf"; break;
+ case kInstructionSunk: name = "InstructionSunk"; break;
+ case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
+ case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
+ case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
+ case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
+ case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
+ case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
+ case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
+ case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
+ case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
+ case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
+ case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
+ case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
+ case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
+ case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
+ case kNotInlinedWont: name = "NotInlinedWont"; break;
+ case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
+ case kNotInlinedProxy: name = "NotInlinedProxy"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c221..66bfea9860 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+ if (deoptimize->GuardsAnInput()) {
+ // Replace the uses with the actual guarded instruction.
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ deoptimize->RemoveGuard();
+ }
+}
+
void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
check->ReplaceWith(check->InputAt(0));
if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227654..7ffbe44ef6 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca59b..d5637b9b75 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) {
BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
} else {
DCHECK(check->IsDeoptimize());
- if (compare->IsEqual()) {
- BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+ if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+ check->SetReferenceTypeInfo(class_rti);
}
}
}
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 84a4bab1a9..0b49ce1a4c 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -29,7 +29,7 @@ namespace art {
*/
class ReferenceTypePropagationTest : public CommonCompilerTest {
public:
- ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+ ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 59523a93a0..c6a0b6a0d2 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -299,14 +299,17 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
// Currently, we spill unconditionnally the current method in the code generators.
&& !interval->GetDefinedBy()->IsCurrentMethod()) {
// We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ Location loc;
+ switch (interval->NumberOfSpillSlotsNeeded()) {
+ case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
+ InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
}
UsePosition* use = current->GetFirstUse();
- UsePosition* env_use = current->GetFirstEnvironmentUse();
+ EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
// Walk over all siblings, updating locations of use positions, and
// connecting them when they are adjacent.
@@ -323,7 +326,6 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
use = use->GetNext();
}
while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
- DCHECK(!use->GetIsEnvironment());
DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
if (!use->IsSynthesized()) {
LocationSummary* locations = use->GetUser()->GetLocations();
@@ -460,9 +462,12 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
location_source = defined_by->GetLocations()->Out();
} else {
DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
+ switch (parent->NumberOfSpillSlotsNeeded()) {
+ case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
+ case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+ case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
}
} else {
DCHECK(source != nullptr);
@@ -493,7 +498,8 @@ static bool IsValidDestination(Location destination) {
|| destination.IsFpuRegister()
|| destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
+ || destination.IsDoubleStackSlot()
+ || destination.IsSIMDStackSlot();
}
void RegisterAllocationResolver::AddMove(HParallelMove* move,
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9064f865c3..87f709f63d 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins
interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
} else {
interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
}
}
}
@@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
bool is_interval_beginning;
size_t position;
std::tie(position, is_interval_beginning, parent_interval) = *it;
-
- bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+ size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
if (is_interval_beginning) {
DCHECK(!parent_interval->HasSpillSlot());
DCHECK_EQ(position, parent_interval->GetStart());
- // Find a free stack slot.
+ // Find first available free stack slot(s).
size_t slot = 0;
- for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
- // Skip taken slots.
+ for (; ; ++slot) {
+ bool found = true;
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ if (taken.IsBitSet(s)) {
+ found = false;
+ break; // failure
+ }
+ }
+ if (found) {
+ break; // success
+ }
}
+
parent_interval->SetSpillSlot(slot);
- *num_stack_slots_used = std::max(*num_stack_slots_used,
- needs_two_slots ? slot + 1 : slot + 2);
- if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
+ if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
// The parallel move resolver requires that there be an even number of spill slots
// allocated for pair value types.
++(*num_stack_slots_used);
}
- taken.SetBit(slot);
- if (needs_two_slots) {
- taken.SetBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ taken.SetBit(s);
}
} else {
DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
DCHECK(parent_interval->HasSpillSlot());
- // Free up the stack slot used by this interval.
+ // Free up the stack slot(s) used by this interval.
size_t slot = parent_interval->GetSpillSlot();
- DCHECK(taken.IsBitSet(slot));
- DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
- taken.ClearBit(slot);
- if (needs_two_slots) {
- taken.ClearBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ DCHECK(taken.IsBitSet(s));
+ taken.ClearBit(s);
}
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 1a391ce9bb..ab8d540359 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -629,21 +629,21 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
HInputsRef inputs = defined_by->GetInputs();
for (size_t i = 0; i < inputs.size(); ++i) {
- // Take the last interval of the input. It is the location of that interval
- // that will be used at `defined_by`.
- LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
- // Note that interval may have not been processed yet.
- // TODO: Handle non-split intervals last in the work list.
- if (locations->InAt(i).IsValid()
- && interval->HasRegister()
- && interval->SameRegisterKind(*current)) {
- // The input must be live until the end of `defined_by`, to comply to
- // the linear scan algorithm. So we use `defined_by`'s end lifetime
- // position to check whether the input is dead or is inactive after
- // `defined_by`.
- DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
- size_t position = defined_by->GetLifetimePosition() + 1;
- FreeIfNotCoverAt(interval, position, free_until);
+ if (locations->InAt(i).IsValid()) {
+ // Take the last interval of the input. It is the location of that interval
+ // that will be used at `defined_by`.
+ LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+ // Note that interval may have not been processed yet.
+ // TODO: Handle non-split intervals last in the work list.
+ if (interval->HasRegister() && interval->SameRegisterKind(*current)) {
+ // The input must be live until the end of `defined_by`, to comply to
+ // the linear scan algorithm. So we use `defined_by`'s end lifetime
+ // position to check whether the input is dead or is inactive after
+ // `defined_by`.
+ DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+ size_t position = defined_by->GetLifetimePosition() + 1;
+ FreeIfNotCoverAt(interval, position, free_until);
+ }
}
}
}
@@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
}
- // Find an available spill slot.
+ // Find first available spill slots.
+ size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded();
size_t slot = 0;
for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()) {
- if (!parent->NeedsTwoSpillSlots()) {
- // One spill slot is sufficient.
- break;
- }
- if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
- // Two spill slots are available.
+ bool found = true;
+ for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) {
+ if ((*spill_slots)[s] > parent->GetStart()) {
+ found = false; // failure
break;
}
}
+ if (found) {
+ break; // success
+ }
}
+ // Need new spill slots?
+ size_t upper = slot + number_of_spill_slots_needed;
+ if (upper > spill_slots->size()) {
+ spill_slots->resize(upper);
+ }
+ // Set slots to end.
size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
+ for (size_t s = slot; s < upper; s++) {
+ (*spill_slots)[s] = end;
}
// Note that the exact spill slot location will be computed when we resolve,
@@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
// TODO: Reuse spill slots when intervals of phis from different catch
// blocks do not overlap.
interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded();
}
}
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 2227872f76..667afb1ec3 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -912,9 +912,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
// Create an interval with lifetime holes.
static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
- first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
- first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
- first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_);
locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
@@ -934,9 +934,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
// before lifetime position 6 yet.
static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
- third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
- third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
- third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_);
locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
third = third->SplitAt(3);
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index ab0dad4300..9236a0e4fa 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
// This class and its sub-classes will never be used to drive a visit of an
// `HGraph` but only to visit `HInstructions` one at a time, so we do not need
// to pass a valid graph to `HGraphDelegateVisitor()`.
- SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+ SchedulingLatencyVisitor()
+ : HGraphDelegateVisitor(nullptr),
+ last_visited_latency_(0),
+ last_visited_internal_latency_(0) {}
void VisitInstruction(HInstruction* instruction) OVERRIDE {
LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
@@ -413,6 +416,7 @@ class HScheduler {
selector_(selector),
only_optimize_loop_blocks_(true),
scheduling_graph_(this, arena),
+ cursor_(nullptr),
candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
virtual ~HScheduler() {}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index be400925d5..eedaf6e67e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -41,7 +41,7 @@ void HSharpening::Run() {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsInvokeStaticOrDirect()) {
- ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+ SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
} else if (instruction->IsLoadString()) {
ProcessLoadString(instruction->AsLoadString());
}
@@ -65,12 +65,12 @@ static bool IsInBootImage(ArtMethod* method) {
}
static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) {
- // Including patch information means the AOT code will be patched, which we don't
- // support in the compiler, and is anyways moving away b/33192586.
- return IsInBootImage(method) && !options.GetCompilePic() && !options.GetIncludePatchInformation();
+ return IsInBootImage(method) && !options.GetCompilePic();
}
-void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+
+void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+ CodeGenerator* codegen) {
if (invoke->IsStringInit()) {
// Not using the dex cache arrays. But we could still try to use a better dispatch...
// TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -97,12 +97,12 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
// We don't optimize for debuggable as it would prevent us from obsoleting the method in some
// situations.
- if (callee == codegen_->GetGraph()->GetArtMethod() && !codegen_->GetGraph()->IsDebuggable()) {
+ if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) {
// Recursive call.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
} else if (Runtime::Current()->UseJitCompilation() ||
- AOTCanEmbedMethod(callee, codegen_->GetCompilerOptions())) {
+ AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) {
// JIT or on-device AOT compilation referencing a boot image method.
// Use the method address directly.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
@@ -111,13 +111,17 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
} else {
// Use PC-relative access to the dex cache arrays.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
- DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
- &graph_->GetDexFile());
+ // Note: we use the invoke's graph instead of the codegen graph, which are
+ // different when inlining (the codegen graph is the most outer graph). The
+ // invoke's dex method index is relative to the dex file where the invoke's graph
+ // was built from.
+ DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()),
+ &invoke->GetBlock()->GetGraph()->GetDexFile());
method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
- if (graph_->IsDebuggable()) {
+ if (codegen->GetGraph()->IsDebuggable()) {
// For debuggable apps always use the code pointer from ArtMethod
// so that we don't circumvent instrumentation stubs if installed.
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
@@ -127,14 +131,14 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
method_load_kind, code_ptr_location, method_load_data
};
HInvokeStaticOrDirect::DispatchInfo dispatch_info =
- codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
+ codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
invoke->SetDispatchInfo(dispatch_info);
}
-HLoadClass::LoadKind HSharpening::SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit) {
Handle<mirror::Class> klass = load_class->GetClass();
DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
@@ -255,7 +259,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr) {
if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -267,7 +271,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr &&
runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
!codegen_->GetCompilerOptions().GetCompilePic()) {
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 4240b2f339..10707c796f 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -48,14 +48,16 @@ class HSharpening : public HOptimization {
static constexpr const char* kSharpeningPassName = "sharpening";
// Used by the builder and the inliner.
- static HLoadClass::LoadKind SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit)
+ static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
+ // Used by Sharpening and InstructionSimplifier.
+ static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+
private:
- void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
void ProcessLoadString(HLoadString* load_string);
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index bac6088bf7..fea47e66d9 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -25,8 +25,8 @@ namespace art {
class SideEffectsAnalysis : public HOptimization {
public:
- explicit SideEffectsAnalysis(HGraph* graph)
- : HOptimization(graph, kSideEffectsAnalysisPassName),
+ SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName)
+ : HOptimization(graph, pass_name),
graph_(graph),
block_effects_(graph->GetBlocks().size(),
graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)),
@@ -41,7 +41,7 @@ class SideEffectsAnalysis : public HOptimization {
bool HasRun() const { return has_run_; }
- static constexpr const char* kSideEffectsAnalysisPassName = "SideEffects";
+ static constexpr const char* kSideEffectsAnalysisPassName = "side_effects";
private:
void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index e8e12e1a55..b538a89a06 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -469,8 +469,15 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
}
-bool LiveInterval::NeedsTwoSpillSlots() const {
- return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+ // For a SIMD operation, compute the number of needed spill slots.
+ // TODO: do through vector type?
+ HInstruction* definition = GetParent()->GetDefinedBy();
+ if (definition != nullptr && definition->IsVecOperation()) {
+ return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize;
+ }
+ // Return number of needed spill slots based on type.
+ return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
Location LiveInterval::ToLocation() const {
@@ -494,10 +501,11 @@ Location LiveInterval::ToLocation() const {
if (defined_by->IsConstant()) {
return defined_by->GetLocations()->Out();
} else if (GetParent()->HasSpillSlot()) {
- if (NeedsTwoSpillSlots()) {
- return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
- } else {
- return Location::StackSlot(GetParent()->GetSpillSlot());
+ switch (NumberOfSpillSlotsNeeded()) {
+ case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
+ case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+ case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot());
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
} else {
return Location();
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b62bf4e5f9..e9dffc1fac 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
#define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
-#include "nodes.h"
#include <iostream>
+#include "nodes.h"
+
namespace art {
class CodeGenerator;
@@ -103,21 +104,20 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> {
*/
class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
public:
- UsePosition(HInstruction* user,
- HEnvironment* environment,
- size_t input_index,
- size_t position,
- UsePosition* next)
+ UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next)
: user_(user),
- environment_(environment),
input_index_(input_index),
position_(position),
next_(next) {
- DCHECK(environment == nullptr || user == nullptr);
DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
}
- static constexpr size_t kNoInput = -1;
+ explicit UsePosition(size_t position)
+ : user_(nullptr),
+ input_index_(kNoInput),
+ position_(dchecked_integral_cast<uint32_t>(position)),
+ next_(nullptr) {
+ }
size_t GetPosition() const { return position_; }
@@ -125,9 +125,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
void SetNext(UsePosition* next) { next_ = next; }
HInstruction* GetUser() const { return user_; }
- HEnvironment* GetEnvironment() const { return environment_; }
- bool GetIsEnvironment() const { return environment_ != nullptr; }
bool IsSynthesized() const { return user_ == nullptr; }
size_t GetInputIndex() const { return input_index_; }
@@ -142,20 +140,20 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
UsePosition* Dup(ArenaAllocator* allocator) const {
return new (allocator) UsePosition(
- user_, environment_, input_index_, position_,
+ user_, input_index_, position_,
next_ == nullptr ? nullptr : next_->Dup(allocator));
}
bool RequiresRegister() const {
- if (GetIsEnvironment()) return false;
if (IsSynthesized()) return false;
Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
return location.IsUnallocated() && location.RequiresRegisterKind();
}
private:
+ static constexpr uint32_t kNoInput = static_cast<uint32_t>(-1);
+
HInstruction* const user_;
- HEnvironment* const environment_;
const size_t input_index_;
const size_t position_;
UsePosition* next_;
@@ -163,6 +161,50 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
DISALLOW_COPY_AND_ASSIGN(UsePosition);
};
+/**
+ * An environment use position represents a live interval for environment use at a given position.
+ */
+class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
+ public:
+ EnvUsePosition(HEnvironment* environment,
+ size_t input_index,
+ size_t position,
+ EnvUsePosition* next)
+ : environment_(environment),
+ input_index_(input_index),
+ position_(position),
+ next_(next) {
+ DCHECK(environment != nullptr);
+ DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
+ }
+
+ size_t GetPosition() const { return position_; }
+
+ EnvUsePosition* GetNext() const { return next_; }
+ void SetNext(EnvUsePosition* next) { next_ = next; }
+
+ HEnvironment* GetEnvironment() const { return environment_; }
+ size_t GetInputIndex() const { return input_index_; }
+
+ void Dump(std::ostream& stream) const {
+ stream << position_;
+ }
+
+ EnvUsePosition* Dup(ArenaAllocator* allocator) const {
+ return new (allocator) EnvUsePosition(
+ environment_, input_index_, position_,
+ next_ == nullptr ? nullptr : next_->Dup(allocator));
+ }
+
+ private:
+ HEnvironment* const environment_;
+ const size_t input_index_;
+ const size_t position_;
+ EnvUsePosition* next_;
+
+ DISALLOW_COPY_AND_ASSIGN(EnvUsePosition);
+};
+
class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
public:
explicit SafepointPosition(HInstruction* instruction)
@@ -227,7 +269,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
size_t position = instruction->GetLifetimePosition();
first_use_ = new (allocator_) UsePosition(
- instruction, /* environment */ nullptr, temp_index, position, first_use_);
+ instruction, temp_index, position, first_use_);
AddRange(position, position + 1);
}
@@ -276,7 +318,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
}
DCHECK(first_use_->GetPosition() + 1 == position);
UsePosition* new_use = new (allocator_) UsePosition(
- instruction, nullptr /* environment */, input_index, position, cursor->GetNext());
+ instruction, input_index, position, cursor->GetNext());
cursor->SetNext(new_use);
if (first_range_->GetEnd() == first_use_->GetPosition()) {
first_range_->end_ = position;
@@ -285,11 +327,11 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
}
if (is_environment) {
- first_env_use_ = new (allocator_) UsePosition(
- nullptr /* instruction */, environment, input_index, position, first_env_use_);
+ first_env_use_ = new (allocator_) EnvUsePosition(
+ environment, input_index, position, first_env_use_);
} else {
first_use_ = new (allocator_) UsePosition(
- instruction, nullptr /* environment */, input_index, position, first_use_);
+ instruction, input_index, position, first_use_);
}
if (is_environment && !keep_alive) {
@@ -328,10 +370,10 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
AddBackEdgeUses(*block);
}
first_use_ = new (allocator_) UsePosition(
- instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
+ instruction, input_index, block->GetLifetimeEnd(), first_use_);
}
- void AddRange(size_t start, size_t end) {
+ ALWAYS_INLINE void AddRange(size_t start, size_t end) {
if (first_range_ == nullptr) {
first_range_ = last_range_ = range_search_start_ =
new (allocator_) LiveRange(start, end, first_range_);
@@ -538,7 +580,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
return first_use_;
}
- UsePosition* GetFirstEnvironmentUse() const {
+ EnvUsePosition* GetFirstEnvironmentUse() const {
return first_env_use_;
}
@@ -676,7 +718,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
current = current->GetNext();
}
stream << "}, uses: { ";
- UsePosition* use = first_use_;
+ const UsePosition* use = first_use_;
if (use != nullptr) {
do {
use->Dump(stream);
@@ -684,12 +726,12 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
} while ((use = use->GetNext()) != nullptr);
}
stream << "}, { ";
- use = first_env_use_;
- if (use != nullptr) {
+ const EnvUsePosition* env_use = first_env_use_;
+ if (env_use != nullptr) {
do {
- use->Dump(stream);
+ env_use->Dump(stream);
stream << " ";
- } while ((use = use->GetNext()) != nullptr);
+ } while ((env_use = env_use->GetNext()) != nullptr);
}
stream << "}";
stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
@@ -720,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Returns kNoRegister otherwise.
int FindHintAtDefinition() const;
- // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
- // slots for spilling.
- bool NeedsTwoSpillSlots() const;
+ // Returns the number of required spilling slots (measured as a multiple of the
+ // Dex virtual register size `kVRegSize`).
+ size_t NumberOfSpillSlotsNeeded() const;
bool IsFloatingPoint() const {
return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;
@@ -1015,12 +1057,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
DCHECK(last_in_new_list == nullptr ||
back_edge_use_position > last_in_new_list->GetPosition());
- UsePosition* new_use = new (allocator_) UsePosition(
- /* user */ nullptr,
- /* environment */ nullptr,
- UsePosition::kNoInput,
- back_edge_use_position,
- /* next */ nullptr);
+ UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position);
if (last_in_new_list != nullptr) {
// Going outward. The latest created use needs to point to the new use.
@@ -1056,7 +1093,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Uses of this interval. Note that this linked list is shared amongst siblings.
UsePosition* first_use_;
- UsePosition* first_env_use_;
+ EnvUsePosition* first_env_use_;
// The instruction type this interval corresponds to.
const Primitive::Type type_;
@@ -1210,8 +1247,7 @@ class SsaLivenessAnalysis : public ValueObject {
// Returns whether `instruction` in an HEnvironment held by `env_holder`
// should be kept live by the HEnvironment.
- static bool ShouldBeLiveForEnvironment(HInstruction* env_holder,
- HInstruction* instruction) {
+ static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) {
if (instruction == nullptr) return false;
// A value that's not live in compiled code may still be needed in interpreter,
// due to code motion, etc.
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
new file mode 100644
index 0000000000..a1016d1d47
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "driver/compiler_options.h"
+#include "code_generator.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+class SsaLivenessAnalysisTest : public testing::Test {
+ public:
+ SsaLivenessAnalysisTest()
+ : pool_(),
+ allocator_(&pool_),
+ graph_(CreateGraph(&allocator_)),
+ compiler_options_(),
+ instruction_set_(kRuntimeISA) {
+ std::string error_msg;
+ instruction_set_features_ =
+ InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg);
+ codegen_ = CodeGenerator::Create(graph_,
+ instruction_set_,
+ *instruction_set_features_,
+ compiler_options_);
+ CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture.";
+ // Create entry block.
+ entry_ = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry_);
+ graph_->SetEntryBlock(entry_);
+ }
+
+ protected:
+ HBasicBlock* CreateSuccessor(HBasicBlock* block) {
+ HGraph* graph = block->GetGraph();
+ HBasicBlock* successor = new (&allocator_) HBasicBlock(graph);
+ graph->AddBlock(successor);
+ block->AddSuccessor(successor);
+ return successor;
+ }
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+ CompilerOptions compiler_options_;
+ InstructionSet instruction_set_;
+ std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+ std::unique_ptr<CodeGenerator> codegen_;
+ HBasicBlock* entry_;
+};
+
+TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+ HInstruction* arg = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+ entry_->AddInstruction(arg);
+
+ HBasicBlock* block = CreateSuccessor(entry_);
+ HInstruction* ret = new (&allocator_) HReturn(arg);
+ block->AddInstruction(ret);
+ block->AddInstruction(new (&allocator_) HExit());
+
+ graph_->BuildDominatorTree();
+ SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+ ssa_analysis.Analyze();
+
+ std::ostringstream arg_dump;
+ arg->GetLiveInterval()->Dump(arg_dump);
+ EXPECT_STREQ("ranges: { [2,6) }, uses: { 6 }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ arg_dump.str().c_str());
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestAput) {
+ HInstruction* array = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+ HInstruction* index = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+ HInstruction* value = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+ HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+ HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+ ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+ allocator_.Adapter());
+ for (HInstruction* insn : args) {
+ entry_->AddInstruction(insn);
+ }
+
+ HBasicBlock* block = CreateSuccessor(entry_);
+ HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+ block->AddInstruction(null_check);
+ HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ null_check);
+ null_check_env->CopyFrom(args);
+ null_check->SetRawEnvironment(null_check_env);
+ HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+ block->AddInstruction(length);
+ HInstruction* bounds_check = new (&allocator_) HBoundsCheck(index, length, /* dex_pc */ 0u);
+ block->AddInstruction(bounds_check);
+ HEnvironment* bounds_check_env = new (&allocator_) HEnvironment(&allocator_,
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ bounds_check);
+ bounds_check_env->CopyFrom(args);
+ bounds_check->SetRawEnvironment(bounds_check_env);
+ HInstruction* array_set =
+ new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+ block->AddInstruction(array_set);
+
+ graph_->BuildDominatorTree();
+ SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+ ssa_analysis.Analyze();
+
+ EXPECT_FALSE(graph_->IsDebuggable());
+ EXPECT_EQ(18u, bounds_check->GetLifetimePosition());
+ static const char* const expected[] = {
+ "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "is_high: 0",
+ "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "is_high: 0",
+ "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "is_high: 0",
+ // Environment uses do not keep the non-reference argument alive.
+ "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ // Environment uses keep the reference argument alive.
+ "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ };
+ ASSERT_EQ(arraysize(expected), args.size());
+ size_t arg_index = 0u;
+ for (HInstruction* arg : args) {
+ std::ostringstream arg_dump;
+ arg->GetLiveInterval()->Dump(arg_dump);
+ EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+ ++arg_index;
+ }
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+ HInstruction* array = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+ HInstruction* index = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+ HInstruction* value = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+ HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+ HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+ ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+ allocator_.Adapter());
+ for (HInstruction* insn : args) {
+ entry_->AddInstruction(insn);
+ }
+
+ HBasicBlock* block = CreateSuccessor(entry_);
+ HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+ block->AddInstruction(null_check);
+ HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ null_check);
+ null_check_env->CopyFrom(args);
+ null_check->SetRawEnvironment(null_check_env);
+ HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+ block->AddInstruction(length);
+ // Use HAboveOrEqual+HDeoptimize as the bounds check.
+ HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
+ block->AddInstruction(ae);
+ HInstruction* deoptimize =
+ new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
+ block->AddInstruction(deoptimize);
+ HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ deoptimize);
+ deoptimize_env->CopyFrom(args);
+ deoptimize->SetRawEnvironment(deoptimize_env);
+ HInstruction* array_set =
+ new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+ block->AddInstruction(array_set);
+
+ graph_->BuildDominatorTree();
+ SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+ ssa_analysis.Analyze();
+
+ EXPECT_FALSE(graph_->IsDebuggable());
+ EXPECT_EQ(20u, deoptimize->GetLifetimePosition());
+ static const char* const expected[] = {
+ "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "is_high: 0",
+ "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "is_high: 0",
+ "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ // Environment use in HDeoptimize keeps even the non-reference argument alive.
+ "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ // Environment uses keep the reference argument alive.
+ "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ };
+ ASSERT_EQ(arraysize(expected), args.size());
+ size_t arg_index = 0u;
+ for (HInstruction* arg : args) {
+ std::ostringstream arg_dump;
+ arg->GetLiveInterval()->Dump(arg_dump);
+ EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+ ++arg_index;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 4d12ad6eb6..b7840d73db 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -152,6 +152,9 @@ size_t StackMapStream::PrepareForFillIn() {
encoding.location_catalog.num_entries = location_catalog_entries_.size();
encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize();
encoding.inline_info.num_entries = inline_infos_.size();
+ // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires
+ // dex_method_index_idx to be filled in.
+ PrepareMethodIndices();
ComputeInlineInfoEncoding(&encoding.inline_info.encoding,
encoding.dex_register_map.num_bytes);
CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
@@ -245,7 +248,7 @@ void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
for (size_t j = 0; j < entry.inlining_depth; ++j) {
InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
if (inline_entry.method == nullptr) {
- method_index_max = std::max(method_index_max, inline_entry.method_index);
+ method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx);
extra_data_max = std::max(extra_data_max, 1u);
} else {
method_index_max = std::max(
@@ -288,7 +291,25 @@ size_t StackMapStream::MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry,
return entry.offset;
}
-void StackMapStream::FillIn(MemoryRegion region) {
+void StackMapStream::FillInMethodInfo(MemoryRegion region) {
+ {
+ MethodInfo info(region.begin(), method_indices_.size());
+ for (size_t i = 0; i < method_indices_.size(); ++i) {
+ info.SetMethodIndex(i, method_indices_[i]);
+ }
+ }
+ if (kIsDebugBuild) {
+ // Check the data matches.
+ MethodInfo info(region.begin());
+ const size_t count = info.NumMethodIndices();
+ DCHECK_EQ(count, method_indices_.size());
+ for (size_t i = 0; i < count; ++i) {
+ DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]);
+ }
+ }
+}
+
+void StackMapStream::FillInCodeInfo(MemoryRegion region) {
DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
@@ -345,7 +366,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx));
invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset);
invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type);
- invoke_info.SetMethodIndex(encoding.invoke_info.encoding, entry.dex_method_index);
+ invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx);
++invoke_info_idx;
}
@@ -364,7 +385,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
if (inline_entry.method != nullptr) {
- inline_info.SetMethodIndexAtDepth(
+ inline_info.SetMethodIndexIdxAtDepth(
encoding.inline_info.encoding,
depth,
High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
@@ -373,9 +394,9 @@ void StackMapStream::FillIn(MemoryRegion region) {
depth,
Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
} else {
- inline_info.SetMethodIndexAtDepth(encoding.inline_info.encoding,
- depth,
- inline_entry.method_index);
+ inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding,
+ depth,
+ inline_entry.dex_method_index_idx);
inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1);
}
inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc);
@@ -533,6 +554,29 @@ size_t StackMapStream::PrepareRegisterMasks() {
return dedupe.size();
}
+void StackMapStream::PrepareMethodIndices() {
+ CHECK(method_indices_.empty());
+ method_indices_.resize(stack_maps_.size() + inline_infos_.size());
+ ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
+ for (StackMapEntry& stack_map : stack_maps_) {
+ const size_t index = dedupe.size();
+ const uint32_t method_index = stack_map.dex_method_index;
+ if (method_index != DexFile::kDexNoIndex) {
+ stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+ method_indices_[index] = method_index;
+ }
+ }
+ for (InlineInfoEntry& inline_info : inline_infos_) {
+ const size_t index = dedupe.size();
+ const uint32_t method_index = inline_info.method_index;
+ CHECK_NE(method_index, DexFile::kDexNoIndex);
+ inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+ method_indices_[index] = method_index;
+ }
+ method_indices_.resize(dedupe.size());
+}
+
+
size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
// Preallocate memory since we do not want it to move (the dedup map will point into it).
const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -590,7 +634,8 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_),
entry.native_pc_code_offset.Uint32Value(instruction_set_));
DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type);
- DCHECK_EQ(invoke_info.GetMethodIndex(encoding.invoke_info.encoding), entry.dex_method_index);
+ DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding),
+ entry.dex_method_index_idx);
invoke_info_index++;
}
CheckDexRegisterMap(code_info,
@@ -615,8 +660,10 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d),
inline_entry.method);
} else {
- DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info.encoding, d),
- inline_entry.method_index);
+ const size_t method_index_idx =
+ inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d);
+ DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx);
+ DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index);
}
CheckDexRegisterMap(code_info,
@@ -633,4 +680,9 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
}
}
+size_t StackMapStream::ComputeMethodInfoSize() const {
+ DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__;
+ return MethodInfo::ComputeSize(method_indices_.size());
+}
+
} // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 4225a875b9..e6471e1bc5 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -22,6 +22,7 @@
#include "base/hash_map.h"
#include "base/value_object.h"
#include "memory_region.h"
+#include "method_info.h"
#include "nodes.h"
#include "stack_map.h"
@@ -70,6 +71,7 @@ class StackMapStream : public ValueObject {
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_mask_max_(-1),
dex_pc_max_(0),
@@ -120,6 +122,7 @@ class StackMapStream : public ValueObject {
size_t dex_register_map_index;
InvokeType invoke_type;
uint32_t dex_method_index;
+ uint32_t dex_method_index_idx; // Index into dex method index table.
};
struct InlineInfoEntry {
@@ -128,6 +131,7 @@ class StackMapStream : public ValueObject {
uint32_t method_index;
DexRegisterMapEntry dex_register_entry;
size_t dex_register_map_index;
+ uint32_t dex_method_index_idx; // Index into the dex method index table.
};
void BeginStackMapEntry(uint32_t dex_pc,
@@ -164,7 +168,10 @@ class StackMapStream : public ValueObject {
// Prepares the stream to fill in a memory region. Must be called before FillIn.
// Returns the size (in bytes) needed to store this stream.
size_t PrepareForFillIn();
- void FillIn(MemoryRegion region);
+ void FillInCodeInfo(MemoryRegion region);
+ void FillInMethodInfo(MemoryRegion region);
+
+ size_t ComputeMethodInfoSize() const;
private:
size_t ComputeDexRegisterLocationCatalogSize() const;
@@ -180,6 +187,9 @@ class StackMapStream : public ValueObject {
// Returns the number of unique register masks.
size_t PrepareRegisterMasks();
+ // Prepare and deduplicate method indices.
+ void PrepareMethodIndices();
+
// Deduplicate entry if possible and return the corresponding index into dex_register_entries_
// array. If entry is not a duplicate, a new entry is added to dex_register_entries_.
size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry);
@@ -232,6 +242,7 @@ class StackMapStream : public ValueObject {
ArenaVector<InlineInfoEntry> inline_infos_;
ArenaVector<uint8_t> stack_masks_;
ArenaVector<uint32_t> register_masks_;
+ ArenaVector<uint32_t> method_indices_;
ArenaVector<DexRegisterMapEntry> dex_register_entries_;
int stack_mask_max_;
uint32_t dex_pc_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 330f7f28b6..a842c6e452 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -60,7 +60,7 @@ TEST(StackMapTest, Test1) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -173,7 +173,7 @@ TEST(StackMapTest, Test2) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -433,7 +433,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -519,7 +519,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -611,7 +611,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -672,7 +672,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo ci(region);
CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -721,7 +721,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -823,7 +823,7 @@ TEST(StackMapTest, InlineTest) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo ci(region);
CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -950,7 +950,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) {
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
- stream.FillIn(region);
+ stream.FillInCodeInfo(region);
CodeInfo code_info(region);
CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -979,11 +979,16 @@ TEST(StackMapTest, TestInvokeInfo) {
stream.AddInvoke(kDirect, 65535);
stream.EndStackMapEntry();
- const size_t size = stream.PrepareForFillIn();
- MemoryRegion region(arena.Alloc(size, kArenaAllocMisc), size);
- stream.FillIn(region);
+ const size_t code_info_size = stream.PrepareForFillIn();
+ MemoryRegion code_info_region(arena.Alloc(code_info_size, kArenaAllocMisc), code_info_size);
+ stream.FillInCodeInfo(code_info_region);
- CodeInfo code_info(region);
+ const size_t method_info_size = stream.ComputeMethodInfoSize();
+ MemoryRegion method_info_region(arena.Alloc(method_info_size, kArenaAllocMisc), method_info_size);
+ stream.FillInMethodInfo(method_info_region);
+
+ CodeInfo code_info(code_info_region);
+ MethodInfo method_info(method_info_region.begin());
CodeInfoEncoding encoding = code_info.ExtractEncoding();
ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding));
@@ -996,13 +1001,13 @@ TEST(StackMapTest, TestInvokeInfo) {
EXPECT_TRUE(invoke2.IsValid());
EXPECT_TRUE(invoke3.IsValid());
EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper);
- EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding), 1u);
+ EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u);
EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u);
EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic);
- EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding), 3u);
+ EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u);
EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u);
EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect);
- EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding), 65535u);
+ EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u);
EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u);
}