summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/builder.h11
-rw-r--r--compiler/optimizing/code_generator.cc304
-rw-r--r--compiler/optimizing/code_generator.h31
-rw-r--r--compiler/optimizing/code_generator_arm.cc135
-rw-r--r--compiler/optimizing/code_generator_arm.h8
-rw-r--r--compiler/optimizing/code_generator_arm64.cc151
-rw-r--r--compiler/optimizing/code_generator_arm64.h5
-rw-r--r--compiler/optimizing/code_generator_mips.cc70
-rw-r--r--compiler/optimizing/code_generator_mips.h5
-rw-r--r--compiler/optimizing/code_generator_mips64.cc31
-rw-r--r--compiler/optimizing/code_generator_mips64.h5
-rw-r--r--compiler/optimizing/code_generator_x86.cc96
-rw-r--r--compiler/optimizing/code_generator_x86.h8
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc73
-rw-r--r--compiler/optimizing/code_generator_x86_64.h7
-rw-r--r--compiler/optimizing/codegen_test.cc701
-rw-r--r--compiler/optimizing/dead_code_elimination.cc12
-rw-r--r--compiler/optimizing/dominator_test.cc5
-rw-r--r--compiler/optimizing/graph_checker.cc20
-rw-r--r--compiler/optimizing/graph_test.cc4
-rw-r--r--compiler/optimizing/inliner.cc20
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc9
-rw-r--r--compiler/optimizing/intrinsics.h6
-rw-r--r--compiler/optimizing/nodes.cc45
-rw-r--r--compiler/optimizing/nodes.h18
-rw-r--r--compiler/optimizing/optimizing_compiler.cc155
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc3
-rw-r--r--compiler/optimizing/ssa_builder.cc55
-rw-r--r--compiler/optimizing/ssa_builder.h4
30 files changed, 692 insertions, 1307 deletions
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cbc75..1d604e7135 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@ class HGraphBuilder : public ValueObject {
return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
code_start_(nullptr),
latest_result_(nullptr),
- can_use_baseline_for_string_init_(true),
compilation_stats_(compiler_stats),
interpreter_metadata_(interpreter_metadata),
dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@ class HGraphBuilder : public ValueObject {
return_type_(return_type),
code_start_(nullptr),
latest_result_(nullptr),
- can_use_baseline_for_string_init_(true),
compilation_stats_(nullptr),
interpreter_metadata_(nullptr),
null_dex_cache_(),
@@ -85,10 +83,6 @@ class HGraphBuilder : public ValueObject {
bool BuildGraph(const DexFile::CodeItem& code);
- bool CanUseBaselineForStringInit() const {
- return can_use_baseline_for_string_init_;
- }
-
static constexpr const char* kBuilderPassName = "builder";
// The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@ class HGraphBuilder : public ValueObject {
// used by move-result instructions.
HInstruction* latest_result_;
- // We need to know whether we have built a graph that has calls to StringFactory
- // and hasn't gone through the verifier. If the following flag is `false`, then
- // we cannot compile with baseline.
- bool can_use_baseline_for_string_init_;
-
OptimizingCompilerStats* compilation_stats_;
const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ea0b9eca9a..a3bbfdbd27 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@ size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
return pointer_size * index;
}
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
- Initialize();
- if (!is_leaf) {
- MarkNotLeaf();
- }
- const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
- InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
- + GetGraph()->GetTemporariesVRegSlots()
- + 1 /* filler */,
- 0, /* the baseline compiler does not have live registers at slow path */
- 0, /* the baseline compiler does not have live registers at slow path */
- GetGraph()->GetMaximumNumberOfOutVRegs()
- + (is_64_bit ? 2 : 1) /* current method */,
- GetGraph()->GetBlocks());
- CompileInternal(allocator, /* is_baseline */ true);
-}
-
bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
DCHECK_EQ((*block_order_)[current_block_index_], current);
return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@ void CodeGenerator::GenerateSlowPaths() {
current_slow_path_ = nullptr;
}
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
- is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+ // The register allocator already called `InitializeCodeGeneration`,
+ // where the frame size has been computed.
+ DCHECK(block_order_ != nullptr);
+ Initialize();
+
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
@@ -242,9 +229,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
DisassemblyScope disassembly_scope(current, *this);
- if (is_baseline) {
- InitLocationsBaseline(current);
- }
DCHECK(CheckTypeConsistency(current));
current->Accept(instruction_visitor);
}
@@ -254,7 +238,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
// Emit catch stack maps at the end of the stack map stream as expected by the
// runtime exception handler.
- if (!is_baseline && graph_->HasTryCatch()) {
+ if (graph_->HasTryCatch()) {
RecordCatchBlockInfo();
}
@@ -262,14 +246,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
Finalize(allocator);
}
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
- // The register allocator already called `InitializeCodeGeneration`,
- // where the frame size has been computed.
- DCHECK(block_order_ != nullptr);
- Initialize();
- CompileInternal(allocator, /* is_baseline */ false);
-}
-
void CodeGenerator::Finalize(CodeAllocator* allocator) {
size_t code_size = GetAssembler()->CodeSize();
uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A
// No linker patches by default.
}
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
- for (size_t i = 0; i < length; ++i) {
- if (!array[i]) {
- array[i] = true;
- return i;
- }
- }
- LOG(FATAL) << "Could not find a register in baseline register allocator";
- UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
- for (size_t i = 0; i < length - 1; i += 2) {
- if (!array[i] && !array[i + 1]) {
- array[i] = true;
- array[i + 1] = true;
- return i;
- }
- }
- LOG(FATAL) << "Could not find a register in baseline register allocator";
- UNREACHABLE();
-}
-
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_number_of_live_core_registers,
size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
}
}
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
- LocationSummary* locations = instruction->GetLocations();
- if (locations == nullptr) return;
-
- for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
- blocked_core_registers_[i] = false;
- }
-
- for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- blocked_fpu_registers_[i] = false;
- }
-
- for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
- blocked_register_pairs_[i] = false;
- }
-
- // Mark all fixed input, temp and output registers as used.
- for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
- BlockIfInRegister(locations->InAt(i));
- }
-
- for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
- Location loc = locations->GetTemp(i);
- BlockIfInRegister(loc);
- }
- Location result_location = locations->Out();
- if (locations->OutputCanOverlapWithInputs()) {
- BlockIfInRegister(result_location, /* is_out */ true);
- }
-
- SetupBlockedRegisters(/* is_baseline */ true);
-
- // Allocate all unallocated input locations.
- for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
- Location loc = locations->InAt(i);
- HInstruction* input = instruction->InputAt(i);
- if (loc.IsUnallocated()) {
- if ((loc.GetPolicy() == Location::kRequiresRegister)
- || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
- loc = AllocateFreeRegister(input->GetType());
- } else {
- DCHECK_EQ(loc.GetPolicy(), Location::kAny);
- HLoadLocal* load = input->AsLoadLocal();
- if (load != nullptr) {
- loc = GetStackLocation(load);
- } else {
- loc = AllocateFreeRegister(input->GetType());
- }
- }
- locations->SetInAt(i, loc);
- }
- }
-
- // Allocate all unallocated temp locations.
- for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
- Location loc = locations->GetTemp(i);
- if (loc.IsUnallocated()) {
- switch (loc.GetPolicy()) {
- case Location::kRequiresRegister:
- // Allocate a core register (large enough to fit a 32-bit integer).
- loc = AllocateFreeRegister(Primitive::kPrimInt);
- break;
-
- case Location::kRequiresFpuRegister:
- // Allocate a core register (large enough to fit a 64-bit double).
- loc = AllocateFreeRegister(Primitive::kPrimDouble);
- break;
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << loc.GetPolicy();
- }
- locations->SetTempAt(i, loc);
- }
- }
- if (result_location.IsUnallocated()) {
- switch (result_location.GetPolicy()) {
- case Location::kAny:
- case Location::kRequiresRegister:
- case Location::kRequiresFpuRegister:
- result_location = AllocateFreeRegister(instruction->GetType());
- break;
- case Location::kSameAsFirstInput:
- result_location = locations->InAt(0);
- break;
- }
- locations->UpdateOut(result_location);
- }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
- AllocateLocations(instruction);
- if (instruction->GetLocations() == nullptr) {
- if (instruction->IsTemporary()) {
- HInstruction* previous = instruction->GetPrevious();
- Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
- Move(previous, temp_location, instruction);
- }
- return;
- }
- AllocateRegistersLocally(instruction);
- for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
- Location location = instruction->GetLocations()->InAt(i);
- HInstruction* input = instruction->InputAt(i);
- if (location.IsValid()) {
- // Move the input to the desired location.
- if (input->GetNext()->IsTemporary()) {
- // If the input was stored in a temporary, use that temporary to
- // perform the move.
- Move(input->GetNext(), location, instruction);
- } else {
- Move(input, location, instruction);
- }
- }
- }
-}
-
void CodeGenerator::AllocateLocations(HInstruction* instruction) {
instruction->Accept(GetLocationBuilder());
DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
}
}
-void CodeGenerator::BuildNativeGCMap(
- ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
- const std::vector<uint8_t>& gc_map_raw =
- compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
- ->GetDexGcMap();
- verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
- uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
- size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
- GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
- for (size_t i = 0; i != num_stack_maps; ++i) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- uint32_t native_offset = stack_map_entry.native_pc_offset;
- uint32_t dex_pc = stack_map_entry.dex_pc;
- const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
- CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
- builder.AddEntry(native_offset, references);
- }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
- uint32_t pc2dex_data_size = 0u;
- uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
- uint32_t pc2dex_offset = 0u;
- int32_t pc2dex_dalvik_offset = 0;
- uint32_t dex2pc_data_size = 0u;
- uint32_t dex2pc_entries = 0u;
- uint32_t dex2pc_offset = 0u;
- int32_t dex2pc_dalvik_offset = 0;
-
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
- pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
- pc2dex_offset = stack_map_entry.native_pc_offset;
- pc2dex_dalvik_offset = stack_map_entry.dex_pc;
- }
-
- // Walk over the blocks and find which ones correspond to catch block entries.
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- intptr_t native_pc = GetAddressOf(block);
- ++dex2pc_entries;
- dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
- dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
- dex2pc_offset = native_pc;
- dex2pc_dalvik_offset = block->GetDexPc();
- }
- }
-
- uint32_t total_entries = pc2dex_entries + dex2pc_entries;
- uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
- uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
- data->resize(data_size);
-
- uint8_t* data_ptr = &(*data)[0];
- uint8_t* write_pos = data_ptr;
-
- write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
- write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
- DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
- uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
- pc2dex_offset = 0u;
- pc2dex_dalvik_offset = 0u;
- dex2pc_offset = 0u;
- dex2pc_dalvik_offset = 0u;
-
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
- write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
- write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
- pc2dex_offset = stack_map_entry.native_pc_offset;
- pc2dex_dalvik_offset = stack_map_entry.dex_pc;
- }
-
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- intptr_t native_pc = GetAddressOf(block);
- write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
- write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
- dex2pc_offset = native_pc;
- dex2pc_dalvik_offset = block->GetDexPc();
- }
- }
-
-
- DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
- DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
- if (kIsDebugBuild) {
- // Verify the encoded table holds the expected data.
- MappingTable table(data_ptr);
- CHECK_EQ(table.TotalSize(), total_entries);
- CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
- auto it = table.PcToDexBegin();
- auto it2 = table.DexToPcBegin();
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
- CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
- ++it;
- }
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
- CHECK_EQ(block->GetDexPc(), it2.DexPc());
- ++it2;
- }
- }
- CHECK(it == table.PcToDexEnd());
- CHECK(it2 == table.DexToPcEnd());
- }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
- Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
- // We currently don't use callee-saved registers.
- size_t size = 0 + 1 /* marker */ + 0;
- vmap_encoder.Reserve(size + 1u); // All values are likely to be one byte in ULEB128 (<128).
- vmap_encoder.PushBackUnsigned(size);
- vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
size_t CodeGenerator::ComputeStackMapsSize() {
return stack_map_stream_.PrepareForFillIn();
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5958cd89bc..4f8f146753 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@ class FieldAccessCallingConvention {
class CodeGenerator {
public:
- // Compiles the graph to executable instructions. Returns whether the compilation
- // succeeded.
- void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
- void CompileOptimized(CodeAllocator* allocator);
+ // Compiles the graph to executable instructions.
+ void Compile(CodeAllocator* allocator);
static CodeGenerator* Create(HGraph* graph,
InstructionSet instruction_set,
const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@ class CodeGenerator {
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
- virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+ virtual void SetupBlockedRegisters() const = 0;
virtual void ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -290,17 +288,9 @@ class CodeGenerator {
slow_paths_.push_back(slow_path);
}
- void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
- void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
- void BuildNativeGCMap(
- ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
void BuildStackMaps(MemoryRegion region);
size_t ComputeStackMapsSize();
- bool IsBaseline() const {
- return is_baseline_;
- }
-
bool IsLeafMethod() const {
return is_leaf_;
}
@@ -489,7 +479,6 @@ class CodeGenerator {
fpu_callee_save_mask_(fpu_callee_save_mask),
stack_map_stream_(graph->GetArena()),
block_order_(nullptr),
- is_baseline_(false),
disasm_info_(nullptr),
stats_(stats),
graph_(graph),
@@ -502,15 +491,6 @@ class CodeGenerator {
slow_paths_.reserve(8);
}
- // Register allocation logic.
- void AllocateRegistersLocally(HInstruction* instruction) const;
-
- // Backend specific implementation for allocating a register.
- virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
- static size_t FindFreeEntry(bool* array, size_t length);
- static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -593,16 +573,11 @@ class CodeGenerator {
// The order to use for code generation.
const ArenaVector<HBasicBlock*>* block_order_;
- // Whether we are using baseline.
- bool is_baseline_;
-
DisassemblyInformation* disasm_info_;
private:
- void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
- void CompileInternal(CodeAllocator* allocator, bool is_baseline);
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a11ceb9bd9..272579219f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@ static bool ExpectedPairLayout(Location location) {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = R0;
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
static constexpr Register kCoreCalleeSaves[] =
{ R5, R6, R7, R8, R10, R11, LR };
static constexpr SRegister kFpuCalleeSaves[] =
@@ -728,6 +726,24 @@ inline Condition ARMUnsignedCondition(IfCondition cond) {
UNREACHABLE();
}
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return EQ;
+ case kCondNE: return NE /* unordered */;
+ case kCondLT: return gt_bias ? CC : LT /* unordered */;
+ case kCondLE: return gt_bias ? LS : LE /* unordered */;
+ case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+ case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Register(reg);
}
@@ -815,58 +831,7 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
CodeGenerator::Finalize(allocator);
}
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- ArmManagedRegister pair =
- ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- ArmManagedRegister current =
- ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat: {
- int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimDouble: {
- int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
- DCHECK_EQ(reg % 2, 0);
- return Location::FpuRegisterPairLocation(reg, reg + 1);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[R1_R2] = true;
@@ -881,15 +846,7 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
// Reserve temp register.
blocked_core_registers_[IP] = true;
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
-
- blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
- }
-
- if (is_baseline || GetGraph()->IsDebuggable()) {
+ if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
// now, just block them.
@@ -919,11 +876,10 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene
void CodeGeneratorARM::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
- // Save one extra register for baseline. Note that on thumb2, there is no easy
- // instruction to restore just the PC, so this actually helps both baseline
- // and non-baseline to save and restore at least two registers at entry and exit.
- core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+ // There is no easy instruction to restore just the PC on thumb2. We spill and
+ // restore another arbitrary register.
+ core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
// We use vpush and vpop for saving and restoring floating point registers, which take
// a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1416,15 +1372,9 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
- Label* false_label) {
+ Label* false_label ATTRIBUTE_UNUSED) {
__ vmstat(); // transfer FP status register to ARM APSR.
- // TODO: merge into a single branch (except "equal or unordered" and "not equal")
- if (cond->IsFPConditionTrueIfNaN()) {
- __ b(true_label, VS); // VS for unordered.
- } else if (cond->IsFPConditionFalseIfNaN()) {
- __ b(false_label, VS); // VS for unordered.
- }
- __ b(true_label, ARMCondition(cond->GetCondition()));
+ __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
}
void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1972,9 +1922,9 @@ void InstructionCodeGeneratorARM::VisitInvokeUnresolved(HInvokeUnresolved* invok
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
codegen_->GetAssembler(),
@@ -2004,9 +1954,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen)
}
void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -3803,6 +3753,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Label less, greater, done;
Primitive::Type type = compare->InputAt(0)->GetType();
+ Condition less_cond;
switch (type) {
case Primitive::kPrimLong: {
__ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3813,6 +3764,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
__ LoadImmediate(out, 0);
__ cmp(left.AsRegisterPairLow<Register>(),
ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare.
+ less_cond = LO;
break;
}
case Primitive::kPrimFloat:
@@ -3825,14 +3777,15 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
}
__ vmstat(); // transfer FP status register to ARM APSR.
- __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered.
+ less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
}
default:
LOG(FATAL) << "Unexpected compare type " << type;
+ UNREACHABLE();
}
__ b(&done, EQ);
- __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats.
+ __ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
@@ -5530,7 +5483,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -5740,8 +5693,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6027,6 +5980,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
codegen_->AddSlowPath(slow_path);
+ // IP = Thread::Current()->GetIsGcMarking()
__ LoadFromOffset(
kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
__ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6105,11 +6059,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// }
//
// Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as:
- // - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
- // - it performs additional checks that we do not do here for
- // performance reasons.
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
Register ref_reg = ref.AsRegister<Register>();
Register temp_reg = temp.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26d6d63b31..d45ea973f9 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -340,9 +340,7 @@ class CodeGeneratorARM : public CodeGenerator {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
@@ -444,7 +442,7 @@ class CodeGeneratorARM : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t offset,
Location temp,
@@ -452,7 +450,7 @@ class CodeGeneratorARM : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5e905fc9aa..2cb2741b17 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@ inline Condition ARM64Condition(IfCondition cond) {
UNREACHABLE();
}
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM64 condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne /* unordered */;
+ case kCondLT: return gt_bias ? cc : lt /* unordered */;
+ case kCondLE: return gt_bias ? ls : le /* unordered */;
+ case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+ case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
Location ARM64ReturnLocation(Primitive::Type return_type) {
// Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
// same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -604,30 +622,13 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
instruction_->GetLocations()->Intrinsified()));
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!(instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()));
__ Bind(GetEntryLabel());
- // Note: In the case of a HArrayGet instruction, when the base
- // address is a HArm64IntermediateAddress instruction, it does not
- // point to the array object itself, but to an offset within this
- // object. However, the read barrier entry point needs the array
- // object address to be passed as first argument. So we
- // temporarily set back `obj_` to that address, and restore its
- // initial value later.
- if (instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
- if (kIsDebugBuild) {
- HArm64IntermediateAddress* intermediate_address =
- instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
- uint32_t intermediate_address_offset =
- intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
- DCHECK_EQ(intermediate_address_offset, offset_);
- DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
- }
- Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
- __ Sub(obj_reg, obj_reg, offset_);
- }
-
SaveLiveRegisters(codegen, locations);
// We may have to change the index's value, but as `index_` is a
@@ -728,22 +729,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
RestoreLiveRegisters(codegen, locations);
- // Restore the value of `obj_` when it corresponds to a
- // HArm64IntermediateAddress instruction.
- if (instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
- if (kIsDebugBuild) {
- HArm64IntermediateAddress* intermediate_address =
- instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
- uint32_t intermediate_address_offset =
- intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
- DCHECK_EQ(intermediate_address_offset, offset_);
- DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
- }
- Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
- __ Add(obj_reg, obj_reg, offset_);
- }
-
__ B(GetExitLabel());
}
@@ -1127,7 +1112,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
}
}
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
// Blocked core registers:
// lr : Runtime reserved.
// tr : Runtime reserved.
@@ -1148,40 +1133,17 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
}
- if (is_baseline) {
- CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
- while (!reserved_core_baseline_registers.IsEmpty()) {
- blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
- }
- }
-
- if (is_baseline || GetGraph()->IsDebuggable()) {
+ if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
// now, just block them.
- CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
- while (!reserved_fp_baseline_registers.IsEmpty()) {
- blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+ CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+ while (!reserved_fp_registers_debuggable.IsEmpty()) {
+ blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
}
}
}
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
- if (type == Primitive::kPrimVoid) {
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- if (Primitive::IsFloatingPointType(type)) {
- ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
- DCHECK_NE(reg, -1);
- return Location::FpuRegisterLocation(reg);
- } else {
- ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
- DCHECK_NE(reg, -1);
- return Location::RegisterLocation(reg);
- }
-}
-
size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
__ Str(reg, MemOperand(sp, stack_index));
@@ -1970,6 +1932,9 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
}
void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1979,6 +1944,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr
void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
HArm64IntermediateAddress* instruction) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
__ Add(OutputRegister(instruction),
InputRegisterAt(instruction, 0),
Operand(InputOperandAt(instruction, 1)));
@@ -2067,6 +2035,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
} else {
Register temp = temps.AcquireSameSizeAs(obj);
if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
// `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2093,11 +2064,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
} else {
- // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
- // not contain the base address of the array object, which is
- // needed by the read barrier entry point. So the read barrier
- // slow path will temporarily set back `obj_loc` to the right
- // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
}
}
@@ -2161,6 +2127,9 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireSameSizeAs(array);
if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
// We do not need to compute the intermediate address from the array: the
// input instruction has done it already. See the comment in
// `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
@@ -2407,12 +2376,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
} else {
__ Fcmp(left, InputFPRegisterAt(compare, 1));
}
- if (compare->IsGtBias()) {
- __ Cset(result, ne);
- } else {
- __ Csetm(result, ne);
- }
- __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+ __ Cset(result, ne);
+ __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
break;
}
default:
@@ -2448,7 +2413,6 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register res = RegisterFrom(locations->Out(), instruction->GetType());
IfCondition if_cond = instruction->GetCondition();
- Condition arm64_cond = ARM64Condition(if_cond);
if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2459,20 +2423,13 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
} else {
__ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
}
- __ Cset(res, arm64_cond);
- if (instruction->IsFPConditionTrueIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1
- __ Csel(res, res, Operand(1), vc); // VC for "not unordered".
- } else if (instruction->IsFPConditionFalseIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0
- __ Csel(res, res, Operand(0), vc); // VC for "not unordered".
- }
+ __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
} else {
// Integer cases.
Register lhs = InputRegisterAt(instruction, 0);
Operand rhs = InputOperandAt(instruction, 1);
__ Cmp(lhs, rhs);
- __ Cset(res, arm64_cond);
+ __ Cset(res, ARM64Condition(if_cond));
}
}
@@ -2842,15 +2799,11 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
} else {
__ Fcmp(lhs, InputFPRegisterAt(condition, 1));
}
- if (condition->IsFPConditionTrueIfNaN()) {
- __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
- } else if (condition->IsFPConditionFalseIfNaN()) {
- __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
- }
if (true_target == nullptr) {
- __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+ IfCondition opposite_condition = condition->GetOppositeCondition();
+ __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
} else {
- __ B(ARM64Condition(condition->GetCondition()), true_target);
+ __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
}
} else {
// Integer cases.
@@ -3488,9 +3441,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
if (intrinsic.TryDispatch(invoke)) {
@@ -3738,9 +3691,9 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f2ff89488e..8eb9fcc558 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -339,10 +339,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e34767cecd..5bd136a3f0 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,7 +1042,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
__ Bind(&done);
}
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[A1_A2] = true;
@@ -1072,16 +1072,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
blocked_fpu_registers_[i] = true;
}
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
-
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
- }
- }
-
UpdateBlockedPairRegisters();
}
@@ -1096,52 +1086,6 @@ void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
}
}
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- MipsManagedRegister pair =
- MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- MipsManagedRegister current =
- MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- UNREACHABLE();
-}
-
size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
return kMipsWordSize;
@@ -3835,9 +3779,9 @@ void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -3973,9 +3917,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
}
void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c3d4851ee9..2cde0ed90b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,10 +290,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 79cd56d698..05054867fe 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -979,7 +979,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
__ Bind(&done);
}
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
// ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
blocked_core_registers_[ZERO] = true;
blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
// TODO: review; anything else?
- // TODO: make these two for's conditional on is_baseline once
- // all the issues with register saving/restoring are sorted out.
+ // TODO: remove once all the issues with register saving/restoring are sorted out.
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
blocked_core_registers_[kCoreCalleeSaves[i]] = true;
}
@@ -1014,20 +1013,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
}
}
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
- if (type == Primitive::kPrimVoid) {
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- if (Primitive::IsFloatingPointType(type)) {
- size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
- return Location::FpuRegisterLocation(reg);
- } else {
- size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
- return Location::RegisterLocation(reg);
- }
-}
-
size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
return kMips64WordSize;
@@ -3031,9 +3016,9 @@ void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -3182,9 +3167,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
}
void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 7182e8e987..140ff95f14 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -289,10 +289,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6259acded3..f7ccdd8b8f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -817,65 +817,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- X86ManagedRegister pair =
- X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register reg = static_cast<Register>(
- FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- X86ManagedRegister current =
- X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- return Location::FpuRegisterLocation(
- FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[ECX_EDX] = true;
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
- if (is_baseline) {
- blocked_core_registers_[EBP] = true;
- blocked_core_registers_[ESI] = true;
- blocked_core_registers_[EDI] = true;
- }
-
UpdateBlockedPairRegisters();
}
@@ -1981,9 +1929,9 @@ void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invok
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -1999,17 +1947,6 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
if (invoke->HasPcRelativeDexCache()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
}
-
- if (codegen_->IsBaseline()) {
- // Baseline does not have enough registers if the current method also
- // needs a register. We therefore do not require a register for it, and let
- // the code generation of the invoke handle it.
- LocationSummary* locations = invoke->GetLocations();
- Location location = locations->InAt(invoke->GetSpecialInputIndex());
- if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
- locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
- }
- }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2022,9 +1959,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen)
}
void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -4286,7 +4223,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
if (current_method.IsRegister()) {
method_reg = current_method.AsRegister<Register>();
} else {
- DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+ DCHECK(invoke->GetLocations()->Intrinsified());
DCHECK(!current_method.IsValid());
method_reg = reg;
__ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5076,11 +5013,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
- // This location builder might end up asking to up to four registers, which is
- // not currently possible for baseline. The situation in which we need four
- // registers cannot be met by baseline though, because it has not run any
- // optimization.
-
Primitive::Type value_type = instruction->GetComponentType();
bool needs_write_barrier =
@@ -6077,7 +6009,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -6308,8 +6240,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6588,6 +6520,8 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ movl(root_reg, Address(obj, offset));
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
}
}
@@ -6650,7 +6584,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Note: the original implementation in ReadBarrier::Barrier is
// slightly more complex as:
// - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
+ // the high-bits of rb_state, which are expected to be all zeroes
+ // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+ // which is a no-op thanks to the x86 memory model);
// - it performs additional checks that we do not do here for
// performance reasons.
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c65c423eae..43e9543e41 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -359,9 +359,7 @@ class CodeGeneratorX86 : public CodeGenerator {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
@@ -453,7 +451,7 @@ class CodeGeneratorX86 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t offset,
Location temp,
@@ -461,7 +459,7 @@ class CodeGeneratorX86 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e024ce2b6c..2ce2d91502 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1002,47 +1002,12 @@ InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
// Stack register is always reserved.
blocked_core_registers_[RSP] = true;
// Block the register used as TMP.
blocked_core_registers_[TMP] = true;
-
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
- }
- }
}
static dwarf::Reg DWARFReg(Register reg) {
@@ -2161,9 +2126,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* in
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg
}
void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -4698,13 +4663,13 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool object_array_set_with_read_barrier =
kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call || object_array_set_with_read_barrier) ?
+ (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
@@ -4733,7 +4698,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Location index = locations->InAt(1);
Location value = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4785,7 +4750,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movl(address, Immediate(0));
codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
- DCHECK(!may_need_runtime_call);
+ DCHECK(!may_need_runtime_call_for_type_check);
break;
}
@@ -4794,7 +4759,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
NearLabel done, not_null, do_put;
SlowPathCode* slow_path = nullptr;
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- if (may_need_runtime_call) {
+ if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
@@ -4872,7 +4837,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
} else {
__ movl(address, register_value);
}
- if (!may_need_runtime_call) {
+ if (!may_need_runtime_call_for_type_check) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -5661,7 +5626,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -5892,8 +5857,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6155,6 +6120,8 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ movl(root_reg, Address(obj, offset));
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
}
}
@@ -6217,7 +6184,9 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// Note: the original implementation in ReadBarrier::Barrier is
// slightly more complex as:
// - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
+ // the high-bits of rb_state, which are expected to be all zeroes
+ // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+ // here, which is a no-op thanks to the x86-64 memory model);
// - it performs additional checks that we do not do here for
// performance reasons.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 505c9dcdad..82aabb04d3 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -347,8 +347,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
CpuRegister obj,
uint32_t offset,
Location temp,
@@ -409,7 +408,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
CpuRegister obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704368..19d63de499 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
#include "dex_file.h"
#include "dex_instruction.h"
#include "driver/compiler_options.h"
+#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
AddAllocatedRegister(Location::RegisterLocation(arm::R7));
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
- arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARM::SetupBlockedRegisters();
blocked_core_registers_[arm::R4] = true;
blocked_core_registers_[arm::R6] = false;
blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
- x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+ void SetupBlockedRegisters() const OVERRIDE {
+ x86::CodeGeneratorX86::SetupBlockedRegisters();
// ebx is a callee-save register in C, but caller-save for ART.
blocked_core_registers_[x86::EBX] = true;
blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@ static void Run(const InternalCodeAllocator& allocator,
}
template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
- HGraph* graph,
- bool has_result,
- Expected expected) {
- InternalCodeAllocator allocator;
-
- CompilerOptions compiler_options;
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
- // We avoid doing a stack overflow check that requires the runtime being setup,
- // by making sure the compiler knows the methods we are running are leaf methods.
- codegenX86.CompileBaseline(&allocator, true);
- if (target_isa == kX86) {
- Run(allocator, codegenX86, has_result, expected);
- }
+static void RunCode(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ ASSERT_TRUE(graph->IsInSsaForm());
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
- codegenARM.CompileBaseline(&allocator, true);
- if (target_isa == kArm || target_isa == kThumb2) {
- Run(allocator, codegenARM, has_result, expected);
- }
-
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
- codegenX86_64.CompileBaseline(&allocator, true);
- if (target_isa == kX86_64) {
- Run(allocator, codegenX86_64, has_result, expected);
- }
-
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
- codegenARM64.CompileBaseline(&allocator, true);
- if (target_isa == kArm64) {
- Run(allocator, codegenARM64, has_result, expected);
- }
-
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
- codegenMIPS.CompileBaseline(&allocator, true);
- if (kRuntimeISA == kMips) {
- Run(allocator, codegenMIPS, has_result, expected);
- }
-
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
- codegenMIPS64.CompileBaseline(&allocator, true);
- if (target_isa == kMips64) {
- Run(allocator, codegenMIPS64, has_result, expected);
- }
-}
+ SSAChecker graph_checker(graph);
+ graph_checker.Run();
+ ASSERT_TRUE(graph_checker.IsValid());
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- // Tests may have already computed it.
- if (graph->GetReversePostOrder().empty()) {
- graph->BuildDominatorTree();
- }
SsaLivenessAnalysis liveness(graph, codegen);
- liveness.Analyze();
- RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
- register_allocator.AllocateRegisters();
+ PrepareForRegisterAllocation(graph).Run();
+ liveness.Analyze();
+ RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
hook_before_codegen(graph);
InternalCodeAllocator allocator;
- codegen->CompileOptimized(&allocator);
+ codegen->Compile(&allocator);
Run(allocator, *codegen, has_result, expected);
}
template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
+static void RunCode(InstructionSet target_isa,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
CompilerOptions compiler_options;
if (target_isa == kArm || target_isa == kThumb2) {
std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
- RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kArm64) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
- RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kX86) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
- RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kX86_64) {
std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
X86_64InstructionSetFeatures::FromCppDefines());
x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
- RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kMips) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
MipsInstructionSetFeatures::FromCppDefines());
mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
- RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kMips64) {
std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
Mips64InstructionSetFeatures::FromCppDefines());
mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
- RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
}
}
-static void TestCode(InstructionSet target_isa,
- const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+ ::std::vector<InstructionSet> v;
+ // Add all ISAs that are executable on hardware or on simulator.
+ const ::std::vector<InstructionSet> executable_isa_candidates = {
+ kArm,
+ kArm64,
+ kThumb2,
+ kX86,
+ kX86_64,
+ kMips,
+ kMips64
+ };
+
+ for (auto target_isa : executable_isa_candidates) {
+ if (CanExecute(target_isa)) {
+ v.push_back(target_isa);
+ }
+ }
+
+ return v;
+}
+
+static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateGraph(&arena);
- HGraphBuilder builder(graph);
- const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
- bool graph_built = builder.BuildGraph(*item);
- ASSERT_TRUE(graph_built);
- // Remove suspend checks, they cannot be executed in this context.
- RemoveSuspendChecks(graph);
- RunCodeBaseline(target_isa, graph, has_result, expected);
-}
-
-static void TestCodeLong(InstructionSet target_isa,
- const uint16_t* data,
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateGraph(&arena);
+ HGraphBuilder builder(graph);
+ const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+ bool graph_built = builder.BuildGraph(*item);
+ ASSERT_TRUE(graph_built);
+ // Remove suspend checks, they cannot be executed in this context.
+ RemoveSuspendChecks(graph);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ }
+}
+
+static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateGraph(&arena);
- HGraphBuilder builder(graph, Primitive::kPrimLong);
- const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
- bool graph_built = builder.BuildGraph(*item);
- ASSERT_TRUE(graph_built);
- // Remove suspend checks, they cannot be executed in this context.
- RemoveSuspendChecks(graph);
- RunCodeBaseline(target_isa, graph, has_result, expected);
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateGraph(&arena);
+ HGraphBuilder builder(graph, Primitive::kPrimLong);
+ const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+ bool graph_built = builder.BuildGraph(*item);
+ ASSERT_TRUE(graph_built);
+ // Remove suspend checks, they cannot be executed in this context.
+ RemoveSuspendChecks(graph);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ }
}
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x200,
Instruction::RETURN_VOID,
Instruction::GOTO | 0xFF00);
- TestCode(GetParam(), data1);
+ TestCode(data1);
const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_16, 3,
Instruction::RETURN_VOID,
Instruction::GOTO_16, 0xFFFF);
- TestCode(GetParam(), data2);
+ TestCode(data2);
const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 4, 0,
Instruction::RETURN_VOID,
Instruction::GOTO_32, 0xFFFF, 0xFFFF);
- TestCode(GetParam(), data3);
+ TestCode(data3);
}
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID,
Instruction::GOTO | 0x100,
Instruction::GOTO | 0xFE00);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN | 0);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 0 | 1 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@ TEST_P(CodegenTest, ReturnIf1) {
Instruction::RETURN | 0 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@ TEST_P(CodegenTest, ReturnIf2) {
Instruction::RETURN | 0 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
// Exercise bit-wise (one's complement) not-int instruction.
#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) { \
+TEST_F(CodegenTest, TEST_NAME) { \
const int32_t input = INPUT; \
const uint16_t input_lo = Low16Bits(input); \
const uint16_t input_hi = High16Bits(input); \
@@ -485,7 +455,7 @@ TEST_P(CodegenTest, TEST_NAME) { \
Instruction::NOT_INT | 1 << 8 | 0 << 12 , \
Instruction::RETURN | 1 << 8); \
\
- TestCode(GetParam(), data, true, EXPECTED_OUTPUT); \
+ TestCode(data, true, EXPECTED_OUTPUT); \
}
NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648) // -(2^31)
// Exercise bit-wise (one's complement) not-long instruction.
#define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) { \
+TEST_F(CodegenTest, TEST_NAME) { \
const int64_t input = INPUT; \
const uint16_t word0 = Low16Bits(Low32Bits(input)); /* LSW. */ \
const uint16_t word1 = High16Bits(Low32Bits(input)); \
@@ -512,7 +482,7 @@ TEST_P(CodegenTest, TEST_NAME) { \
Instruction::NOT_LONG | 2 << 8 | 0 << 12, \
Instruction::RETURN_WIDE | 2 << 8); \
\
- TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT); \
+ TestCodeLong(data, true, EXPECTED_OUTPUT); \
}
NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX,
#undef NOT_LONG_TEST
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
const int64_t input = INT64_C(4294967296); // 2^32
const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW.
const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@ TEST_P(CodegenTest, IntToLongOfLongToInt) {
Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
Instruction::RETURN_WIDE | 2 << 8);
- TestCodeLong(GetParam(), data, true, 1);
+ TestCodeLong(data, true, 1);
}
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT, 1 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT16, 3,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
-}
-
-TEST_P(CodegenTest, NonMaterializedCondition) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
-
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry);
- graph->SetEntryBlock(entry);
- entry->AddInstruction(new (&allocator) HGoto());
-
- HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(first_block);
- entry->AddSuccessor(first_block);
- HIntConstant* constant0 = graph->GetIntConstant(0);
- HIntConstant* constant1 = graph->GetIntConstant(1);
- HEqual* equal = new (&allocator) HEqual(constant0, constant0);
- first_block->AddInstruction(equal);
- first_block->AddInstruction(new (&allocator) HIf(equal));
-
- HBasicBlock* then = new (&allocator) HBasicBlock(graph);
- HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
- HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
- graph->AddBlock(then);
- graph->AddBlock(else_);
- graph->AddBlock(exit);
- first_block->AddSuccessor(then);
- first_block->AddSuccessor(else_);
- then->AddSuccessor(exit);
- else_->AddSuccessor(exit);
-
- exit->AddInstruction(new (&allocator) HExit());
- then->AddInstruction(new (&allocator) HReturn(constant0));
- else_->AddInstruction(new (&allocator) HReturn(constant1));
-
- ASSERT_TRUE(equal->NeedsMaterialization());
- graph->BuildDominatorTree();
- PrepareForRegisterAllocation(graph).Run();
- ASSERT_FALSE(equal->NeedsMaterialization());
-
- auto hook_before_codegen = [](HGraph* graph_in) {
- HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
- HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
- block->InsertInstructionBefore(move, block->GetLastInstruction());
- };
-
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT, 1 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
- Instruction::CONST_4 | 3 << 12 | 0,
- Instruction::CONST_4 | 0 << 12 | 1 << 8,
- Instruction::CONST_4 | 4 << 12 | 2 << 8,
- Instruction::CONST_4 | 0 << 12 | 3 << 8,
+ Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+ Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG, 2 << 8 | 0,
Instruction::RETURN_WIDE);
- TestCodeLong(GetParam(), data, true, 12);
+ TestCodeLong(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
- Instruction::CONST_4 | 3 << 12 | 0 << 8,
- Instruction::CONST_4 | 0 << 12 | 1 << 8,
- Instruction::CONST_4 | 4 << 12 | 2 << 8,
- Instruction::CONST_4 | 0 << 12 | 3 << 8,
+ Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+ Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG_2ADDR | 2 << 12,
Instruction::RETURN_WIDE);
- TestCodeLong(GetParam(), data, true, 12);
+ TestCodeLong(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT16, 3,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, MaterializedCondition1) {
- // Check that condition are materialized correctly. A materialized condition
- // should yield `1` if it evaluated to true, and `0` otherwise.
- // We force the materialization of comparisons for different combinations of
- // inputs and check the results.
-
- int lhs[] = {1, 2, -1, 2, 0xabc};
- int rhs[] = {2, 1, 2, -1, 0xabc};
-
- for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry_block);
- graph->SetEntryBlock(entry_block);
- entry_block->AddInstruction(new (&allocator) HGoto());
- HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(code_block);
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ entry->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(first_block);
+ entry->AddSuccessor(first_block);
+ HIntConstant* constant0 = graph->GetIntConstant(0);
+ HIntConstant* constant1 = graph->GetIntConstant(1);
+ HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+ first_block->AddInstruction(equal);
+ first_block->AddInstruction(new (&allocator) HIf(equal));
+
+ HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+ HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->SetExitBlock(exit_block);
+
+ graph->AddBlock(then_block);
+ graph->AddBlock(else_block);
graph->AddBlock(exit_block);
- exit_block->AddInstruction(new (&allocator) HExit());
+ first_block->AddSuccessor(then_block);
+ first_block->AddSuccessor(else_block);
+ then_block->AddSuccessor(exit_block);
+ else_block->AddSuccessor(exit_block);
- entry_block->AddSuccessor(code_block);
- code_block->AddSuccessor(exit_block);
- graph->SetExitBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+ then_block->AddInstruction(new (&allocator) HReturn(constant0));
+ else_block->AddInstruction(new (&allocator) HReturn(constant1));
- HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
- HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
- HLessThan cmp_lt(cst_lhs, cst_rhs);
- code_block->AddInstruction(&cmp_lt);
- HReturn ret(&cmp_lt);
- code_block->AddInstruction(&ret);
+ ASSERT_TRUE(equal->NeedsMaterialization());
+ TransformToSsa(graph);
+ PrepareForRegisterAllocation(graph).Run();
+ ASSERT_FALSE(equal->NeedsMaterialization());
auto hook_before_codegen = [](HGraph* graph_in) {
HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@ TEST_P(CodegenTest, MaterializedCondition1) {
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ RunCode(target_isa, graph, hook_before_codegen, true, 0);
}
}
-TEST_P(CodegenTest, MaterializedCondition2) {
- // Check that HIf correctly interprets a materialized condition.
- // We force the materialization of comparisons for different combinations of
- // inputs. An HIf takes the materialized combination as input and returns a
- // value that we verify.
-
- int lhs[] = {1, 2, -1, 2, 0xabc};
- int rhs[] = {2, 1, 2, -1, 0xabc};
-
-
- for (size_t i = 0; i < arraysize(lhs); i++) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
-
- HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry_block);
- graph->SetEntryBlock(entry_block);
- entry_block->AddInstruction(new (&allocator) HGoto());
-
- HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_block);
- HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_true_block);
- HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_false_block);
- HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(exit_block);
- exit_block->AddInstruction(new (&allocator) HExit());
-
- graph->SetEntryBlock(entry_block);
- entry_block->AddSuccessor(if_block);
- if_block->AddSuccessor(if_true_block);
- if_block->AddSuccessor(if_false_block);
- if_true_block->AddSuccessor(exit_block);
- if_false_block->AddSuccessor(exit_block);
- graph->SetExitBlock(exit_block);
-
- HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
- HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
- HLessThan cmp_lt(cst_lhs, cst_rhs);
- if_block->AddInstruction(&cmp_lt);
- // We insert a temporary to separate the HIf from the HLessThan and force
- // the materialization of the condition.
- HTemporary force_materialization(0);
- if_block->AddInstruction(&force_materialization);
- HIf if_lt(&cmp_lt);
- if_block->AddInstruction(&if_lt);
-
- HIntConstant* cst_lt = graph->GetIntConstant(1);
- HReturn ret_lt(cst_lt);
- if_true_block->AddInstruction(&ret_lt);
- HIntConstant* cst_ge = graph->GetIntConstant(0);
- HReturn ret_ge(cst_ge);
- if_false_block->AddInstruction(&ret_ge);
-
- auto hook_before_codegen = [](HGraph* graph_in) {
- HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
- HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
- block->InsertInstructionBefore(move, block->GetLastInstruction());
- };
+TEST_F(CodegenTest, MaterializedCondition1) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ // Check that condition are materialized correctly. A materialized condition
+ // should yield `1` if it evaluated to true, and `0` otherwise.
+ // We force the materialization of comparisons for different combinations of
+
+ // inputs and check the results.
+
+ int lhs[] = {1, 2, -1, 2, 0xabc};
+ int rhs[] = {2, 1, 2, -1, 0xabc};
+
+ for (size_t i = 0; i < arraysize(lhs); i++) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+
+ HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry_block);
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddInstruction(new (&allocator) HGoto());
+ HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(code_block);
+ HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+
+ entry_block->AddSuccessor(code_block);
+ code_block->AddSuccessor(exit_block);
+ graph->SetExitBlock(exit_block);
+
+ HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+ HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+ HLessThan cmp_lt(cst_lhs, cst_rhs);
+ code_block->AddInstruction(&cmp_lt);
+ HReturn ret(&cmp_lt);
+ code_block->AddInstruction(&ret);
+
+ TransformToSsa(graph);
+ auto hook_before_codegen = [](HGraph* graph_in) {
+ HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+ HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+ block->InsertInstructionBefore(move, block->GetLastInstruction());
+ };
+ RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ }
+ }
+}
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+TEST_F(CodegenTest, MaterializedCondition2) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ // Check that HIf correctly interprets a materialized condition.
+ // We force the materialization of comparisons for different combinations of
+ // inputs. An HIf takes the materialized combination as input and returns a
+ // value that we verify.
+
+ int lhs[] = {1, 2, -1, 2, 0xabc};
+ int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+ for (size_t i = 0; i < arraysize(lhs); i++) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+
+ HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry_block);
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_block);
+ HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_true_block);
+ HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_false_block);
+ HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddSuccessor(if_block);
+ if_block->AddSuccessor(if_true_block);
+ if_block->AddSuccessor(if_false_block);
+ if_true_block->AddSuccessor(exit_block);
+ if_false_block->AddSuccessor(exit_block);
+ graph->SetExitBlock(exit_block);
+
+ HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+ HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+ HLessThan cmp_lt(cst_lhs, cst_rhs);
+ if_block->AddInstruction(&cmp_lt);
+ // We insert a temporary to separate the HIf from the HLessThan and force
+ // the materialization of the condition.
+ HTemporary force_materialization(0);
+ if_block->AddInstruction(&force_materialization);
+ HIf if_lt(&cmp_lt);
+ if_block->AddInstruction(&if_lt);
+
+ HIntConstant* cst_lt = graph->GetIntConstant(1);
+ HReturn ret_lt(cst_lt);
+ if_true_block->AddInstruction(&ret_lt);
+ HIntConstant* cst_ge = graph->GetIntConstant(0);
+ HReturn ret_ge(cst_ge);
+ if_false_block->AddInstruction(&ret_ge);
+
+ TransformToSsa(graph);
+ auto hook_before_codegen = [](HGraph* graph_in) {
+ HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+ HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+ block->InsertInstructionBefore(move, block->GetLastInstruction());
+ };
+ RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ }
}
}
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, ReturnDivIntLit8) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::DIV_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0,
Instruction::CONST_4 | 2 << 12 | 1 << 8,
Instruction::DIV_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 2);
+ TestCode(data, true, 2);
}
// Helper method.
@@ -933,80 +907,55 @@ static void TestComparison(IfCondition condition,
block->AddInstruction(comparison);
block->AddInstruction(new (&allocator) HReturn(comparison));
- auto hook_before_codegen = [](HGraph*) {
- };
- RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
-}
-
-TEST_P(CodegenTest, ComparisonsInt) {
- const InstructionSet target_isa = GetParam();
- for (int64_t i = -1; i <= 1; i++) {
- for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
+}
+
+TEST_F(CodegenTest, ComparisonsInt) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ for (int64_t i = -1; i <= 1; i++) {
+ for (int64_t j = -1; j <= 1; j++) {
+ TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+ }
}
}
}
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
// TODO: make MIPS work for long
if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
return;
}
- const InstructionSet target_isa = GetParam();
- if (target_isa == kMips || target_isa == kMips64) {
- return;
- }
-
- for (int64_t i = -1; i <= 1; i++) {
- for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ if (target_isa == kMips || target_isa == kMips64) {
+ continue;
}
- }
-}
-static ::std::vector<InstructionSet> GetTargetISAs() {
- ::std::vector<InstructionSet> v;
- // Add all ISAs that are executable on hardware or on simulator.
- const ::std::vector<InstructionSet> executable_isa_candidates = {
- kArm,
- kArm64,
- kThumb2,
- kX86,
- kX86_64,
- kMips,
- kMips64
- };
-
- for (auto target_isa : executable_isa_candidates) {
- if (CanExecute(target_isa)) {
- v.push_back(target_isa);
+ for (int64_t i = -1; i <= 1; i++) {
+ for (int64_t j = -1; j <= 1; j++) {
+ TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+ }
}
}
-
- return v;
}
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
- CodegenTest,
- ::testing::ValuesIn(GetTargetISAs()));
-
} // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b152..e170e37bdd 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
}
void HDeadCodeElimination::RemoveDeadBlocks() {
+ if (graph_->HasIrreducibleLoops()) {
+ // Do not eliminate dead blocks if the graph has irreducible loops. We could
+ // support it, but that would require changes in our loop representation to handle
+ // multiple entry points. We decided it was not worth the complexity.
+ return;
+ }
// Classify blocks as reachable/unreachable.
ArenaAllocator* allocator = graph_->GetArena();
ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
MarkReachableBlocks(graph_, &live_blocks);
bool removed_one_or_more_blocks = false;
- // If the graph has irreducible loops we need to reset all graph analysis we have done
- // before: the irreducible loop can be turned into a reducible one.
- // For simplicity, we do the full computation regardless of the type of the loops.
bool rerun_dominance_and_loop_analysis = false;
// Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
// inside out.
for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
- rerun_dominance_and_loop_analysis = true;
- }
int id = block->GetBlockId();
if (!live_blocks.IsBitSet(id)) {
MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a997fd..feb8b2092a 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) {
const uint32_t dominators[] = {
kInvalidBlockId,
- 0,
- kInvalidBlockId
+ 3,
+ kInvalidBlockId,
+ 0
};
TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9439ba0c8d..31136772c7 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
loop_information->GetPreHeader()->GetSuccessors().size()));
}
+ if (loop_information->GetSuspendCheck() == nullptr) {
+ AddError(StringPrintf(
+ "Loop with header %d does not have a suspend check.",
+ loop_header->GetBlockId()));
+ }
+
+ if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+ AddError(StringPrintf(
+ "Loop header %d does not have the loop suspend check as the first instruction.",
+ loop_header->GetBlockId()));
+ }
+
// Ensure the loop header has only one incoming branch and the remaining
// predecessors are back edges.
size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
}
}
+ if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+ AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+ "but does not have one.",
+ instruction->DebugName(),
+ instruction->GetId(),
+ current_block_->GetBlockId()));
+ }
+
// Ensure an instruction having an environment is dominated by the
// instructions contained in the environment.
for (HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71952..d5305646a8 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
// Ensure there is only one back edge.
ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
- ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+ ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
ASSERT_NE(if_block->GetPredecessors()[1], if_block);
// Ensure the new block is the back edge.
@@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
// Ensure there is only one back edge.
ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
- ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+ ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
ASSERT_NE(if_block->GetPredecessors()[1], if_block);
// Ensure the new block is the back edge.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 293282edbb..2e79df1b84 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
compare, invoke_instruction->GetDexPc());
// TODO: Extend reference type propagation to understand the guard.
if (cursor != nullptr) {
- bb_cursor->InsertInstructionAfter(load_class, cursor);
+ bb_cursor->InsertInstructionAfter(field_get, cursor);
} else {
- bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+ bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
}
- bb_cursor->InsertInstructionAfter(field_get, load_class);
- bb_cursor->InsertInstructionAfter(compare, field_get);
+ bb_cursor->InsertInstructionAfter(load_class, field_get);
+ bb_cursor->InsertInstructionAfter(compare, load_class);
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
VLOG(compiler) << "Method " << PrettyMethod(method)
- << " is too big to inline";
+ << " is too big to inline: "
+ << code_item->insns_size_in_code_units_
+ << " > "
+ << inline_max_code_units;
return false;
}
@@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
for (; !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (block->IsLoopHeader()) {
+
+ if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+ // Don't inline methods with irreducible loops, they could prevent some
+ // optimizations to run.
VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
- << " could not be inlined because it contains a loop";
+ << " could not be inlined because it contains an irreducible loop";
return false;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6bbc751bee..4bcfc54791 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -30,6 +30,15 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio
HInstruction* array,
HInstruction* index,
int access_size) {
+ if (kEmitCompilerReadBarrier) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ //
+ // TODO: Handle this case properly in the ARM64 code generator and
+ // re-enable this optimization; otherwise, remove this TODO.
+ // b/26601270
+ return;
+ }
if (index->IsConstant() ||
(index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
// When the index is a constant all the addressing can be fitted in the
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d1814e..3bf3f7ffae 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
InvokeDexCallingConventionVisitor* calling_convention_visitor) {
if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been
+ // pruned by art::PrepareForRegisterAllocation.
+ DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
}
if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 854d92a409..adf8734214 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@ void HGraph::ClearDominanceInformation() {
void HGraph::ClearLoopInformation() {
SetHasIrreducibleLoops(false);
for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (current->IsLoopHeader()) {
- current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
- }
- current->SetLoopInformation(nullptr);
+ it.Current()->SetLoopInformation(nullptr);
}
}
@@ -180,6 +176,14 @@ void HBasicBlock::ClearDominanceInformation() {
dominator_ = nullptr;
}
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+ HInstruction* instruction = GetFirstInstruction();
+ while (instruction->IsParallelMove()) {
+ instruction = instruction->GetNext();
+ }
+ return instruction;
+}
+
void HGraph::ComputeDominanceInformation() {
DCHECK(reverse_post_order_.empty());
reverse_post_order_.reserve(blocks_.size());
@@ -284,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
// Make sure the loop has only one pre header. This simplifies SSA building by having
// to just look at the pre header to know which locals are initialized at entry of the
- // loop.
+ // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+ // this graph.
size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
- if (number_of_incomings != 1) {
+ if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
AddBlock(pre_header);
pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -457,6 +462,10 @@ void HGraph::SimplifyCFG() {
}
if (block->IsLoopHeader()) {
SimplifyLoop(block);
+ } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+ // We are being called by the dead code elimiation pass, and what used to be
+ // a loop got dismantled. Just remove the suspend check.
+ block->RemoveInstruction(block->GetFirstInstruction());
}
}
}
@@ -1829,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
DCHECK(GetBlocks()[0]->IsEntryBlock());
DCHECK(GetBlocks()[2]->IsExitBlock());
DCHECK(!body->IsExitBlock());
+ DCHECK(!body->IsInLoop());
HInstruction* last = body->GetLastInstruction();
invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1887,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update the meta information surrounding blocks:
// (1) the graph they are now in,
// (2) the reverse post order of that graph,
- // (3) the potential loop information they are now in,
+ // (3) their potential loop information, inner and outer,
// (4) try block membership.
// Note that we do not need to update catch phi inputs because they
// correspond to the register file of the outer method which the inlinee
@@ -1916,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* current = it.Current();
if (current != exit_block_ && current != entry_block_ && current != first) {
- DCHECK(!current->IsInLoop());
DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
current->SetGraph(outer_graph);
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_[++index_of_at] = current;
- if (loop_info != nullptr) {
+ if (!current->IsInLoop()) {
current->SetLoopInformation(loop_info);
- for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+ } else if (current->IsLoopHeader()) {
+ // Clear the information of which blocks are contained in that loop. Since the
+ // information is stored as a bit vector based on block ids, we have to update
+ // it, as those block ids were specific to the callee graph and we are now adding
+ // these blocks to the caller graph.
+ current->GetLoopInformation()->ClearAllBlocks();
+ }
+ if (current->IsInLoop()) {
+ for (HLoopInformationOutwardIterator loop_it(*current);
+ !loop_it.Done();
+ loop_it.Advance()) {
loop_it.Current()->Add(current);
}
}
@@ -1937,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
outer_graph->AddBlock(to);
outer_graph->reverse_post_order_[++index_of_at] = to;
if (loop_info != nullptr) {
- to->SetLoopInformation(loop_info);
+ if (!to->IsInLoop()) {
+ to->SetLoopInformation(loop_info);
+ }
for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
loop_it.Current()->Add(to);
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 859d570b29..5246fd1f05 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
void Add(HBasicBlock* block);
void Remove(HBasicBlock* block);
+ void ClearAllBlocks() {
+ blocks_.ClearAllBits();
+ }
+
private:
// Internal recursive implementation of `Populate`.
void PopulateRecursive(HBasicBlock* block);
@@ -860,6 +864,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
const HInstructionList& GetPhis() const { return phis_; }
+ HInstruction* GetFirstInstructionDisregardMoves() const;
+
void AddSuccessor(HBasicBlock* block) {
successors_.push_back(block);
block->predecessors_.push_back(this);
@@ -3687,19 +3693,13 @@ class HInvokeStaticOrDirect : public HInvoke {
DCHECK(!IsStaticWithExplicitClinitCheck());
}
- HNewInstance* GetThisArgumentOfStringInit() const {
- DCHECK(IsStringInit());
- size_t index = InputCount() - 1;
- DCHECK(InputAt(index)->IsNewInstance());
- return InputAt(index)->AsNewInstance();
- }
-
- void RemoveThisArgumentOfStringInit() {
+ HInstruction* GetAndRemoveThisArgumentOfStringInit() {
DCHECK(IsStringInit());
size_t index = InputCount() - 1;
- DCHECK(InputAt(index)->IsNewInstance());
+ HInstruction* input = InputAt(index);
RemoveAsUserOfInput(index);
inputs_.pop_back();
+ return input;
}
// Is this a call to a static method whose declaring class has an
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb840eabdd..fffd00535c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -127,7 +127,7 @@ class PassObserver : public ValueObject {
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
disasm_info_(graph->GetArena()),
- visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+ visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
visualizer_(visualizer_output, graph, *codegen),
graph_in_bad_state_(false) {
if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -305,30 +305,19 @@ class OptimizingCompiler FINAL : public Compiler {
SHARED_REQUIRES(Locks::mutator_lock_);
private:
- // Whether we should run any optimization or register allocation. If false, will
- // just run the code generation after the graph was built.
- const bool run_optimizations_;
-
// Create a 'CompiledMethod' for an optimized graph.
- CompiledMethod* EmitOptimized(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* driver) const;
-
- // Create a 'CompiledMethod' for a non-optimized graph.
- CompiledMethod* EmitBaseline(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* driver) const;
+ CompiledMethod* Emit(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* driver) const;
// Try compiling a method and return the code generator used for
// compiling it.
// This method:
// 1) Builds the graph. Returns null if it failed to build it.
- // 2) If `run_optimizations_` is set:
- // 2.1) Transform the graph to SSA. Returns null if it failed.
- // 2.2) Run optimizations on the graph, including register allocator.
- // 3) Generate code with the `code_allocator` provided.
+ // 2) Transforms the graph to SSA. Returns null if it failed.
+ // 3) Runs optimizations on the graph, including register allocator.
+ // 4) Generates code with the `code_allocator` provided.
CodeGenerator* TryCompile(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
const DexFile::CodeItem* code_item,
@@ -350,21 +339,19 @@ class OptimizingCompiler FINAL : public Compiler {
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
- : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
- run_optimizations_(
- driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+ : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
void OptimizingCompiler::Init() {
// Enable C1visualizer output. Must be done in Init() because the compiler
// driver is not fully initialized when passed to the compiler's constructor.
CompilerDriver* driver = GetCompilerDriver();
- const std::string cfg_file_name = driver->GetDumpCfgFileName();
+ const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
if (!cfg_file_name.empty()) {
CHECK_EQ(driver->GetThreadCount(), 1U)
<< "Graph visualizer requires the compiler to run single-threaded. "
<< "Invoke the compiler with '-j1'.";
std::ios_base::openmode cfg_file_mode =
- driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+ driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
}
if (driver->GetDumpStats()) {
@@ -577,17 +564,6 @@ static void RunOptimizations(HGraph* graph,
AllocateRegisters(graph, codegen, pass_observer);
}
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
- size_t size = vector.size();
- size_t aligned_size = RoundUp(size, 4);
- for (; size < aligned_size; ++size) {
- vector.push_back(0);
- }
- return ArrayRef<const uint8_t>(vector);
-}
-
static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
codegen->EmitLinkerPatches(&linker_patches);
@@ -601,10 +577,10 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen)
return linker_patches;
}
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
stack_map.resize(codegen->ComputeStackMapsSize());
@@ -630,39 +606,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
return compiled_method;
}
-CompiledMethod* OptimizingCompiler::EmitBaseline(
- ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
- ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
- ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildMappingTable(&mapping_table);
- ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildVMapTable(&vmap_table);
- ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
- CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
- compiler_driver,
- codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(code_allocator->GetMemory()),
- // Follow Quick's behavior and set the frame size to zero if it is
- // considered "empty" (see the definition of
- // art::CodeGenerator::HasEmptyFrame).
- codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
- ArrayRef<const SrcMapElem>(),
- AlignVectorSize(mapping_table),
- AlignVectorSize(vmap_table),
- AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
- ArrayRef<const LinkerPatch>(linker_patches));
- return compiled_method;
-}
-
CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
const DexFile::CodeItem* code_item,
@@ -775,41 +718,37 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
- if (run_optimizations_) {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
- ScopedThreadSuspension sts(soa.Self(), kNative);
-
- {
- PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
- GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
- if (result != kAnalysisSuccess) {
- switch (result) {
- case kAnalysisFailThrowCatchLoop:
- MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
- break;
- case kAnalysisFailAmbiguousArrayOp:
- MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
- break;
- case kAnalysisSuccess:
- UNREACHABLE();
- }
- pass_observer.SetGraphInBadState();
- return nullptr;
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScopeCollection handles(soa.Self());
+ ScopedThreadSuspension sts(soa.Self(), kNative);
+
+ {
+ PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+ GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+ if (result != kAnalysisSuccess) {
+ switch (result) {
+ case kAnalysisFailThrowCatchLoop:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+ break;
+ case kAnalysisFailAmbiguousArrayOp:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+ break;
+ case kAnalysisSuccess:
+ UNREACHABLE();
}
+ pass_observer.SetGraphInBadState();
+ return nullptr;
}
-
- RunOptimizations(graph,
- codegen.get(),
- compiler_driver,
- compilation_stats_.get(),
- dex_compilation_unit,
- &pass_observer,
- &handles);
- codegen->CompileOptimized(code_allocator);
- } else {
- codegen->CompileBaseline(code_allocator);
}
+
+ RunOptimizations(graph,
+ codegen.get(),
+ compiler_driver,
+ compilation_stats_.get(),
+ dex_compilation_unit,
+ &pass_observer,
+ &handles);
+ codegen->Compile(code_allocator);
pass_observer.DumpDisassembly();
if (kArenaAllocatorCountAllocations) {
@@ -861,11 +800,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
dex_cache));
if (codegen.get() != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiled);
- if (run_optimizations_) {
- method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
- } else {
- method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
- }
+ method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
}
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -928,8 +863,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
{
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(self, kNative);
-
- DCHECK(run_optimizations_);
codegen.reset(
TryCompile(&arena,
&code_allocator,
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3ae6..be470ccb7d 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
// This function is used to reduce the dependencies in the graph after
// (from -> to) has been performed. Since we ensure there is no move with the same
- // destination, (to -> X) can not be blocked while (from -> X) might still be
+ // destination, (to -> X) cannot be blocked while (from -> X) might still be
// blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
// (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
// a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2bae4bc5c8..a966b62b4f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- static constexpr bool kIsBaseline = false;
- codegen->SetupBlockedRegisters(kIsBaseline);
+ codegen->SetupBlockedRegisters();
physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
// Always reserve for the current method and the graph's max out registers.
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7494e336b1..165d09d1a5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() {
return true;
}
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+ if (GetGraph()->IsDebuggable()) {
+ // Do not perform the optimization for consistency with the interpreter
+ // which always allocates an object for new-instance of String.
+ return;
+ }
+
+ for (HNewInstance* new_instance : uninitialized_strings_) {
+ DCHECK(new_instance->IsStringAlloc());
+
+ // Replace NewInstance of String with NullConstant if not used prior to
+ // calling StringFactory. In case of deoptimization, the interpreter is
+ // expected to skip null check on the `this` argument of the StringFactory call.
+ if (!new_instance->HasNonEnvironmentUses()) {
+ new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+ new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+ // Remove LoadClass if not needed any more.
+ HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+ DCHECK(load_class != nullptr);
+ DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+ if (!load_class->HasUses()) {
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
+ }
+ }
+}
+
GraphAnalysisResult SsaBuilder::BuildSsa() {
// 1) Visit in reverse post order. We need to have all predecessors of a block
// visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// input types.
dead_phi_elimimation.EliminateDeadPhis();
- // 11) Clear locals.
+ // 11) Step 1) replaced uses of NewInstances of String with the results of
+ // their corresponding StringFactory calls. Unless the String objects are used
+ // before they are initialized, they can be replaced with NullConstant.
+ // Note that this optimization is valid only if unsimplified code does not use
+ // the uninitialized value because we assume execution can be deoptimized at
+ // any safepoint. We must therefore perform it before any other optimizations.
+ RemoveRedundantUninitializedStrings();
+
+ // 12) Clear locals.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
it.Advance()) {
@@ -891,12 +927,21 @@ void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
if (invoke->IsStringInit()) {
// This is a StringFactory call which acts as a String constructor. Its
// result replaces the empty String pre-allocated by NewInstance.
- HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
- invoke->RemoveThisArgumentOfStringInit();
+ HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
+
+ // Replacing the NewInstance might render it redundant. Keep a list of these
+ // to be visited once it is clear whether it is has remaining uses.
+ if (arg_this->IsNewInstance()) {
+ uninitialized_strings_.push_back(arg_this->AsNewInstance());
+ } else {
+ DCHECK(arg_this->IsPhi());
+ // NewInstance is not the direct input of the StringFactory call. It might
+ // be redundant but optimizing this case is not worth the effort.
+ }
- // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+ // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
- if ((*current_locals_)[vreg] == new_instance) {
+ if ((*current_locals_)[vreg] == arg_this) {
(*current_locals_)[vreg] = invoke;
}
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 28eef6a40c..ccef8ea380 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@ class SsaBuilder : public HGraphVisitor {
loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+ uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
locals_for_(graph->GetBlocks().size(),
ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -105,6 +106,8 @@ class SsaBuilder : public HGraphVisitor {
HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
+ void RemoveRedundantUninitializedStrings();
+
StackHandleScopeCollection* const handles_;
// True if types of ambiguous ArrayGets have been resolved.
@@ -119,6 +122,7 @@ class SsaBuilder : public HGraphVisitor {
ArenaVector<HArrayGet*> ambiguous_agets_;
ArenaVector<HArraySet*> ambiguous_asets_;
+ ArenaVector<HNewInstance*> uninitialized_strings_;
// HEnvironment for each block.
ArenaVector<ArenaVector<HInstruction*>> locals_for_;