diff options
Diffstat (limited to 'compiler/optimizing')
59 files changed, 11820 insertions, 1128 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 521992ad3a..5015bd06d9 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -15,16 +15,60 @@ * limitations under the License. */ +#include "builder.h" + +#include "class_linker.h" #include "dex_file.h" #include "dex_file-inl.h" #include "dex_instruction.h" #include "dex_instruction-inl.h" -#include "builder.h" +#include "driver/compiler_driver-inl.h" +#include "mirror/art_field.h" +#include "mirror/art_field-inl.h" +#include "mirror/class_loader.h" +#include "mirror/dex_cache.h" #include "nodes.h" #include "primitive.h" +#include "scoped_thread_state_change.h" +#include "thread.h" namespace art { +/** + * Helper class to add HTemporary instructions. This class is used when + * converting a DEX instruction to multiple HInstruction, and where those + * instructions do not die at the following instruction, but instead spans + * multiple instructions. + */ +class Temporaries : public ValueObject { + public: + Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) { + graph_->UpdateNumberOfTemporaries(count_); + } + + void Add(HInstruction* instruction) { + // We currently only support vreg size temps. + DCHECK(instruction->GetType() != Primitive::kPrimLong + && instruction->GetType() != Primitive::kPrimDouble); + HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++); + instruction->GetBlock()->AddInstruction(temp); + DCHECK(temp->GetPrevious() == instruction); + } + + private: + HGraph* const graph_; + + // The total number of temporaries that will be used. + const size_t count_; + + // Current index in the temporary stack, updated by `Add`. + size_t index_; +}; + +static bool IsTypeSupported(Primitive::Type type) { + return type != Primitive::kPrimFloat && type != Primitive::kPrimDouble; +} + void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); locals_.SetSize(count); @@ -93,15 +137,34 @@ static bool CanHandleCodeItem(const DexFile::CodeItem& code_item) { } template<typename T> -void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not) { +void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_offset) { + int32_t target_offset = instruction.GetTargetOffset(); + PotentiallyAddSuspendCheck(target_offset, dex_offset); HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - current_block_->AddInstruction(new (arena_) T(first, second)); - if (is_not) { - current_block_->AddInstruction(new (arena_) HNot(current_block_->GetLastInstruction())); - } - current_block_->AddInstruction(new (arena_) HIf(current_block_->GetLastInstruction())); - HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset); + T* comparison = new (arena_) T(first, second); + current_block_->AddInstruction(comparison); + HInstruction* ifinst = new (arena_) HIf(comparison); + current_block_->AddInstruction(ifinst); + HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset); + DCHECK(target != nullptr); + current_block_->AddSuccessor(target); + target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits()); + DCHECK(target != nullptr); + current_block_->AddSuccessor(target); + current_block_ = nullptr; +} + +template<typename T> +void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_offset) { + int32_t target_offset = instruction.GetTargetOffset(); + PotentiallyAddSuspendCheck(target_offset, dex_offset); + HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + T* comparison = new (arena_) T(value, GetIntConstant(0)); + current_block_->AddInstruction(comparison); + HInstruction* ifinst = new (arena_) HIf(comparison); + current_block_->AddInstruction(ifinst); + HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset); DCHECK(target != nullptr); current_block_->AddSuccessor(target); target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits()); @@ -120,9 +183,9 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Setup the graph with the entry block and exit block. graph_ = new (arena_) HGraph(arena_); - entry_block_ = new (arena_) HBasicBlock(graph_); + entry_block_ = new (arena_) HBasicBlock(graph_, 0); graph_->AddBlock(entry_block_); - exit_block_ = new (arena_) HBasicBlock(graph_); + exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc); graph_->SetEntryBlock(entry_block_); graph_->SetExitBlock(exit_block_); @@ -150,6 +213,8 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Add the exit block at the end to give it the highest id. graph_->AddBlock(exit_block_); exit_block_->AddInstruction(new (arena_) HExit()); + // Add the suspend check to the entry block. + entry_block_->AddInstruction(new (arena_) HSuspendCheck(0)); entry_block_->AddInstruction(new (arena_) HGoto()); return graph_; } @@ -176,7 +241,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_ branch_targets_.SetSize(code_end - code_ptr); // Create the first block for the dex instructions, single successor of the entry block. - HBasicBlock* block = new (arena_) HBasicBlock(graph_); + HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0); branch_targets_.Put(0, block); entry_block_->AddSuccessor(block); @@ -189,13 +254,13 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_ int32_t target = instruction.GetTargetOffset() + dex_offset; // Create a block for the target instruction. if (FindBlockStartingAt(target) == nullptr) { - block = new (arena_) HBasicBlock(graph_); + block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); } dex_offset += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) { - block = new (arena_) HBasicBlock(graph_); + block = new (arena_) HBasicBlock(graph_, dex_offset); branch_targets_.Put(dex_offset, block); } } else { @@ -211,7 +276,7 @@ HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { } template<typename T> -void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) { +void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) { HInstruction* first = LoadLocal(instruction.VRegB(), type); HInstruction* second = LoadLocal(instruction.VRegC(), type); current_block_->AddInstruction(new (arena_) T(type, first, second)); @@ -266,23 +331,70 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_range, uint32_t* args, uint32_t register_index) { + Instruction::Code opcode = instruction.Opcode(); + InvokeType invoke_type; + switch (opcode) { + case Instruction::INVOKE_STATIC: + case Instruction::INVOKE_STATIC_RANGE: + invoke_type = kStatic; + break; + case Instruction::INVOKE_DIRECT: + case Instruction::INVOKE_DIRECT_RANGE: + invoke_type = kDirect; + break; + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_RANGE: + invoke_type = kVirtual; + break; + case Instruction::INVOKE_INTERFACE: + case Instruction::INVOKE_INTERFACE_RANGE: + invoke_type = kInterface; + break; + case Instruction::INVOKE_SUPER_RANGE: + case Instruction::INVOKE_SUPER: + invoke_type = kSuper; + break; + default: + LOG(FATAL) << "Unexpected invoke op: " << opcode; + return false; + } + const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx); const DexFile::ProtoId& proto_id = dex_file_->GetProtoId(method_id.proto_idx_); const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); - bool is_instance_call = - instruction.Opcode() != Instruction::INVOKE_STATIC - && instruction.Opcode() != Instruction::INVOKE_STATIC_RANGE; + bool is_instance_call = invoke_type != kStatic; const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); - // Treat invoke-direct like static calls for now. - HInvoke* invoke = new (arena_) HInvokeStatic( - arena_, number_of_arguments, return_type, dex_offset, method_idx); + HInvoke* invoke = nullptr; + if (invoke_type == kVirtual) { + MethodReference target_method(dex_file_, method_idx); + uintptr_t direct_code; + uintptr_t direct_method; + int vtable_index; + // TODO: Add devirtualization support. + compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_offset, true, true, + &invoke_type, &target_method, &vtable_index, + &direct_code, &direct_method); + if (vtable_index == -1) { + return false; + } + invoke = new (arena_) HInvokeVirtual( + arena_, number_of_arguments, return_type, dex_offset, vtable_index); + } else { + // Treat invoke-direct like static calls for now. + invoke = new (arena_) HInvokeStatic( + arena_, number_of_arguments, return_type, dex_offset, method_idx); + } size_t start_index = 0; + Temporaries temps(graph_, is_instance_call ? 1 : 0); if (is_instance_call) { HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot); - invoke->SetArgumentAt(0, arg); + HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset); + current_block_->AddInstruction(null_check); + temps.Add(null_check); + invoke->SetArgumentAt(0, null_check); start_index = 1; } @@ -290,28 +402,23 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t argument_index = start_index; for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); - switch (type) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - return false; - - default: { - if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_offset; - // We do not implement non sequential register pair. - return false; - } - HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); - invoke->SetArgumentAt(argument_index, arg); - if (type == Primitive::kPrimLong) { - i++; - } - } + if (!IsTypeSupported(type)) { + return false; + } + if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) { + LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() + << " at " << dex_offset; + // We do not implement non sequential register pair. + return false; + } + HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); + invoke->SetArgumentAt(argument_index, arg); + if (type == Primitive::kPrimLong) { + i++; } } - if (return_type == Primitive::kPrimDouble || return_type == Primitive::kPrimFloat) { + if (!IsTypeSupported(return_type)) { return false; } @@ -320,7 +427,99 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, return true; } -bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset) { +bool HGraphBuilder::BuildFieldAccess(const Instruction& instruction, + uint32_t dex_offset, + bool is_put) { + uint32_t source_or_dest_reg = instruction.VRegA_22c(); + uint32_t obj_reg = instruction.VRegB_22c(); + uint16_t field_index = instruction.VRegC_22c(); + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ArtField> resolved_field(hs.NewHandle( + compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa))); + + if (resolved_field.Get() == nullptr) { + return false; + } + if (resolved_field->IsVolatile()) { + return false; + } + + Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); + if (!IsTypeSupported(field_type)) { + return false; + } + + HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot); + current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_offset)); + if (is_put) { + Temporaries temps(graph_, 1); + HInstruction* null_check = current_block_->GetLastInstruction(); + // We need one temporary for the null check. + temps.Add(null_check); + HInstruction* value = LoadLocal(source_or_dest_reg, field_type); + current_block_->AddInstruction(new (arena_) HInstanceFieldSet( + null_check, + value, + field_type, + resolved_field->GetOffset())); + } else { + current_block_->AddInstruction(new (arena_) HInstanceFieldGet( + current_block_->GetLastInstruction(), + field_type, + resolved_field->GetOffset())); + + UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); + } + return true; +} + +void HGraphBuilder::BuildArrayAccess(const Instruction& instruction, + uint32_t dex_offset, + bool is_put, + Primitive::Type anticipated_type) { + uint8_t source_or_dest_reg = instruction.VRegA_23x(); + uint8_t array_reg = instruction.VRegB_23x(); + uint8_t index_reg = instruction.VRegC_23x(); + + DCHECK(IsTypeSupported(anticipated_type)); + + // We need one temporary for the null check, one for the index, and one for the length. + Temporaries temps(graph_, 3); + + HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot); + object = new (arena_) HNullCheck(object, dex_offset); + current_block_->AddInstruction(object); + temps.Add(object); + + HInstruction* length = new (arena_) HArrayLength(object); + current_block_->AddInstruction(length); + temps.Add(length); + HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt); + index = new (arena_) HBoundsCheck(index, length, dex_offset); + current_block_->AddInstruction(index); + temps.Add(index); + if (is_put) { + HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type); + // TODO: Insert a type check node if the type is Object. + current_block_->AddInstruction(new (arena_) HArraySet( + object, index, value, anticipated_type, dex_offset)); + } else { + current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type)); + UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); + } +} + +void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset) { + if (target_offset <= 0) { + // Unconditionnally add a suspend check to backward branches. We can remove + // them after we recognize loops in the graph. + current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_offset)); + } +} + +bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset) { if (current_block_ == nullptr) { return true; // Dead code } @@ -340,16 +539,38 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } + case Instruction::CONST: { + int32_t register_index = instruction.VRegA(); + HIntConstant* constant = GetIntConstant(instruction.VRegB_31i()); + UpdateLocal(register_index, constant); + break; + } + + case Instruction::CONST_HIGH16: { + int32_t register_index = instruction.VRegA(); + HIntConstant* constant = GetIntConstant(instruction.VRegB_21h() << 16); + UpdateLocal(register_index, constant); + break; + } + case Instruction::CONST_WIDE_16: { int32_t register_index = instruction.VRegA(); - HLongConstant* constant = GetLongConstant(instruction.VRegB_21s()); + // Get 16 bits of constant value, sign extended to 64 bits. + int64_t value = instruction.VRegB_21s(); + value <<= 48; + value >>= 48; + HLongConstant* constant = GetLongConstant(value); UpdateLocal(register_index, constant); break; } case Instruction::CONST_WIDE_32: { int32_t register_index = instruction.VRegA(); - HLongConstant* constant = GetLongConstant(instruction.VRegB_31i()); + // Get 32 bits of constant value, sign extended to 64 bits. + int64_t value = instruction.VRegB_31i(); + value <<= 32; + value >>= 32; + HLongConstant* constant = GetLongConstant(value); UpdateLocal(register_index, constant); break; } @@ -361,31 +582,64 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } - case Instruction::MOVE: { + case Instruction::CONST_WIDE_HIGH16: { + int32_t register_index = instruction.VRegA(); + int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48; + HLongConstant* constant = GetLongConstant(value); + UpdateLocal(register_index, constant); + break; + } + + // TODO: these instructions are also used to move floating point values, so what is + // the type (int or float)? + case Instruction::MOVE: + case Instruction::MOVE_FROM16: + case Instruction::MOVE_16: { HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::RETURN_VOID: { - BuildReturn(instruction, Primitive::kPrimVoid); + // TODO: these instructions are also used to move floating point values, so what is + // the type (long or double)? + case Instruction::MOVE_WIDE: + case Instruction::MOVE_WIDE_FROM16: + case Instruction::MOVE_WIDE_16: { + HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong); + UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::IF_EQ: { - If_22t<HEqual>(instruction, dex_offset, false); + case Instruction::MOVE_OBJECT: + case Instruction::MOVE_OBJECT_16: + case Instruction::MOVE_OBJECT_FROM16: { + HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot); + UpdateLocal(instruction.VRegA(), value); break; } - case Instruction::IF_NE: { - If_22t<HEqual>(instruction, dex_offset, true); + case Instruction::RETURN_VOID: { + BuildReturn(instruction, Primitive::kPrimVoid); break; } +#define IF_XX(comparison, cond) \ + case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_offset); break; \ + case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_offset); break + + IF_XX(HEqual, EQ); + IF_XX(HNotEqual, NE); + IF_XX(HLessThan, LT); + IF_XX(HLessThanOrEqual, LE); + IF_XX(HGreaterThan, GT); + IF_XX(HGreaterThanOrEqual, GE); + case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: { - HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset); + int32_t offset = instruction.GetTargetOffset(); + PotentiallyAddSuspendCheck(offset, dex_offset); + HBasicBlock* target = FindBlockStartingAt(offset + dex_offset); DCHECK(target != nullptr); current_block_->AddInstruction(new (arena_) HGoto()); current_block_->AddSuccessor(target); @@ -409,7 +663,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::INVOKE_STATIC: - case Instruction::INVOKE_DIRECT: { + case Instruction::INVOKE_DIRECT: + case Instruction::INVOKE_VIRTUAL: { uint32_t method_idx = instruction.VRegB_35c(); uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); uint32_t args[5]; @@ -421,7 +676,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::INVOKE_STATIC_RANGE: - case Instruction::INVOKE_DIRECT_RANGE: { + case Instruction::INVOKE_DIRECT_RANGE: + case Instruction::INVOKE_VIRTUAL_RANGE: { uint32_t method_idx = instruction.VRegB_3rc(); uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); uint32_t register_index = instruction.VRegC(); @@ -433,22 +689,22 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::ADD_INT: { - Binop_32x<HAdd>(instruction, Primitive::kPrimInt); + Binop_23x<HAdd>(instruction, Primitive::kPrimInt); break; } case Instruction::ADD_LONG: { - Binop_32x<HAdd>(instruction, Primitive::kPrimLong); + Binop_23x<HAdd>(instruction, Primitive::kPrimLong); break; } case Instruction::SUB_INT: { - Binop_32x<HSub>(instruction, Primitive::kPrimInt); + Binop_23x<HSub>(instruction, Primitive::kPrimInt); break; } case Instruction::SUB_LONG: { - Binop_32x<HSub>(instruction, Primitive::kPrimLong); + Binop_23x<HSub>(instruction, Primitive::kPrimLong); break; } @@ -500,14 +756,70 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::MOVE_RESULT: - case Instruction::MOVE_RESULT_WIDE: { + case Instruction::MOVE_RESULT_WIDE: + case Instruction::MOVE_RESULT_OBJECT: UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); break; + + case Instruction::CMP_LONG: { + Binop_23x<HCompare>(instruction, Primitive::kPrimLong); + break; } case Instruction::NOP: break; + case Instruction::IGET: + case Instruction::IGET_WIDE: + case Instruction::IGET_OBJECT: + case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BYTE: + case Instruction::IGET_CHAR: + case Instruction::IGET_SHORT: { + if (!BuildFieldAccess(instruction, dex_offset, false)) { + return false; + } + break; + } + + case Instruction::IPUT: + case Instruction::IPUT_WIDE: + case Instruction::IPUT_OBJECT: + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + case Instruction::IPUT_CHAR: + case Instruction::IPUT_SHORT: { + if (!BuildFieldAccess(instruction, dex_offset, true)) { + return false; + } + break; + } + +#define ARRAY_XX(kind, anticipated_type) \ + case Instruction::AGET##kind: { \ + BuildArrayAccess(instruction, dex_offset, false, anticipated_type); \ + break; \ + } \ + case Instruction::APUT##kind: { \ + BuildArrayAccess(instruction, dex_offset, true, anticipated_type); \ + break; \ + } + + ARRAY_XX(, Primitive::kPrimInt); + ARRAY_XX(_WIDE, Primitive::kPrimLong); + ARRAY_XX(_OBJECT, Primitive::kPrimNot); + ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean); + ARRAY_XX(_BYTE, Primitive::kPrimByte); + ARRAY_XX(_CHAR, Primitive::kPrimChar); + ARRAY_XX(_SHORT, Primitive::kPrimShort); + + case Instruction::ARRAY_LENGTH: { + HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot); + current_block_->AddInstruction(new (arena_) HArrayLength(object)); + UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction()); + break; + } + default: return false; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 108514a632..e143786be7 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -18,27 +18,23 @@ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ #include "dex_file.h" +#include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "primitive.h" #include "utils/allocation.h" #include "utils/growable_array.h" +#include "nodes.h" namespace art { -class ArenaAllocator; class Instruction; -class HBasicBlock; -class HGraph; -class HIntConstant; -class HLongConstant; -class HInstruction; -class HLocal; class HGraphBuilder : public ValueObject { public: HGraphBuilder(ArenaAllocator* arena, DexCompilationUnit* dex_compilation_unit = nullptr, - const DexFile* dex_file = nullptr) + const DexFile* dex_file = nullptr, + CompilerDriver* driver = nullptr) : arena_(arena), branch_targets_(arena, 0), locals_(arena, 0), @@ -49,7 +45,8 @@ class HGraphBuilder : public ValueObject { constant0_(nullptr), constant1_(nullptr), dex_file_(dex_file), - dex_compilation_unit_(dex_compilation_unit) { } + dex_compilation_unit_(dex_compilation_unit), + compiler_driver_(driver) {} HGraph* BuildGraph(const DexFile::CodeItem& code); @@ -57,7 +54,7 @@ class HGraphBuilder : public ValueObject { // Analyzes the dex instruction and adds HInstruction to the graph // to execute that instruction. Returns whether the instruction can // be handled. - bool AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset); + bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset); // Finds all instructions that start a new block, and populates branch_targets_ with // the newly created blocks. @@ -73,13 +70,14 @@ class HGraphBuilder : public ValueObject { HLocal* GetLocalAt(int register_index) const; void UpdateLocal(int register_index, HInstruction* instruction) const; HInstruction* LoadLocal(int register_index, Primitive::Type type) const; + void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset); // Temporarily returns whether the compiler supports the parameters // of the method. bool InitializeParameters(uint16_t number_of_parameters); template<typename T> - void Binop_32x(const Instruction& instruction, Primitive::Type type); + void Binop_23x(const Instruction& instruction, Primitive::Type type); template<typename T> void Binop_12x(const Instruction& instruction, Primitive::Type type); @@ -90,10 +88,17 @@ class HGraphBuilder : public ValueObject { template<typename T> void Binop_22s(const Instruction& instruction, bool reverse); - template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not); + template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_offset); + template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_offset); void BuildReturn(const Instruction& instruction, Primitive::Type type); + bool BuildFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_get); + void BuildArrayAccess(const Instruction& instruction, + uint32_t dex_offset, + bool is_get, + Primitive::Type anticipated_type); + // Builds an invocation node and returns whether the instruction is supported. bool BuildInvoke(const Instruction& instruction, uint32_t dex_offset, @@ -122,6 +127,7 @@ class HGraphBuilder : public ValueObject { const DexFile* const dex_file_; DexCompilationUnit* const dex_compilation_unit_; + CompilerDriver* const compiler_driver_; DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index beafbcc386..2a9a7b37ab 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -18,40 +18,87 @@ #include "code_generator_arm.h" #include "code_generator_x86.h" +#include "code_generator_x86_64.h" +#include "compiled_method.h" #include "dex/verified_method.h" #include "driver/dex_compilation_unit.h" #include "gc_map_builder.h" #include "leb128.h" #include "mapping_table.h" +#include "ssa_liveness_analysis.h" #include "utils/assembler.h" #include "verifier/dex_gc_map.h" #include "vmap_table.h" namespace art { -void CodeGenerator::Compile(CodeAllocator* allocator) { +void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); + block_labels_.SetSize(blocks.Size()); + + DCHECK_EQ(frame_size_, kUninitializedFrameSize); + if (!is_leaf) { + MarkNotLeaf(); + } + ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs() + + GetGraph()->GetNumberOfTemporaries() + + 1 /* filler */, + 0, /* the baseline compiler does not have live registers at slow path */ + GetGraph()->GetMaximumNumberOfOutVRegs() + + 1 /* current method */); GenerateFrameEntry(); + + HGraphVisitor* location_builder = GetLocationBuilder(); + HGraphVisitor* instruction_visitor = GetInstructionVisitor(); for (size_t i = 0, e = blocks.Size(); i < e; ++i) { - CompileBlock(blocks.Get(i)); + HBasicBlock* block = blocks.Get(i); + Bind(GetLabelOf(block)); + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + current->Accept(location_builder); + InitLocations(current); + current->Accept(instruction_visitor); + } } + GenerateSlowPaths(); + size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); MemoryRegion code(buffer, code_size); GetAssembler()->FinalizeInstructions(code); } -void CodeGenerator::CompileBlock(HBasicBlock* block) { - Bind(GetLabelOf(block)); - HGraphVisitor* location_builder = GetLocationBuilder(); +void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { + // The frame size has already been computed during register allocation. + DCHECK_NE(frame_size_, kUninitializedFrameSize); + const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); + DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); + DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); + block_labels_.SetSize(blocks.Size()); + + GenerateFrameEntry(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - current->Accept(location_builder); - InitLocations(current); - current->Accept(instruction_visitor); + for (size_t i = 0, e = blocks.Size(); i < e; ++i) { + HBasicBlock* block = blocks.Get(i); + Bind(GetLabelOf(block)); + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + current->Accept(instruction_visitor); + } + } + GenerateSlowPaths(); + + size_t code_size = GetAssembler()->CodeSize(); + uint8_t* buffer = allocator->Allocate(code_size); + MemoryRegion code(buffer, code_size); + GetAssembler()->FinalizeInstructions(code); +} + +void CodeGenerator::GenerateSlowPaths() { + for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + slow_paths_.Get(i)->EmitNativeCode(this); } } @@ -63,10 +110,47 @@ size_t CodeGenerator::AllocateFreeRegisterInternal( return regno; } } - LOG(FATAL) << "Unreachable"; return -1; } +void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots) { + first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; + + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + number_of_out_slots * kVRegSize + + maximum_number_of_live_registers * GetWordSize() + + FrameEntrySpillSize(), + kStackAlignment)); +} + +Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const { + uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs(); + // Use the temporary region (right below the dex registers). + int32_t slot = GetFrameSize() - FrameEntrySpillSize() + - kVRegSize // filler + - (number_of_locals * kVRegSize) + - ((1 + temp->GetIndex()) * kVRegSize); + return Location::StackSlot(slot); +} + +int32_t CodeGenerator::GetStackSlot(HLocal* local) const { + uint16_t reg_number = local->GetRegNumber(); + uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs(); + if (reg_number >= number_of_locals) { + // Local is a parameter of the method. It is stored in the caller's frame. + return GetFrameSize() + kVRegSize // ART method + + (reg_number - number_of_locals) * kVRegSize; + } else { + // Local is a temporary in this method. It is stored in this method's frame. + return GetFrameSize() - FrameEntrySpillSize() + - kVRegSize // filler. + - (number_of_locals * kVRegSize) + + (reg_number * kVRegSize); + } +} void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { LocationSummary* locations = instruction->GetLocations(); @@ -131,13 +215,6 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { locations->SetTempAt(i, loc); } } - - // Make all registers available for the return value. - for (size_t i = 0, e = GetNumberOfRegisters(); i < e; ++i) { - blocked_registers_[i] = false; - } - SetupBlockedRegisters(blocked_registers_); - Location result_location = locations->Out(); if (result_location.IsUnallocated()) { switch (result_location.GetPolicy()) { @@ -156,6 +233,12 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { void CodeGenerator::InitLocations(HInstruction* instruction) { if (instruction->GetLocations() == nullptr) { + if (instruction->IsTemporary()) { + HInstruction* previous = instruction->GetPrevious(); + Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); + Move(previous, temp_location, instruction); + previous->GetLocations()->SetOut(temp_location); + } return; } AllocateRegistersLocally(instruction); @@ -191,7 +274,7 @@ CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, return new (allocator) x86::CodeGeneratorX86(graph); } case kX86_64: { - return new (allocator) x86::CodeGeneratorX86(graph); + return new (allocator) x86_64::CodeGeneratorX86_64(graph); } default: return nullptr; @@ -223,7 +306,7 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, SrcMap* src_map) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; @@ -231,6 +314,10 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t dex2pc_data_size = 0u; uint32_t dex2pc_entries = 0u; + if (src_map != nullptr) { + src_map->reserve(pc2dex_entries); + } + // We currently only have pc2dex entries. for (size_t i = 0; i < pc2dex_entries; i++) { struct PcInfo pc_info = pc_infos_.Get(i); @@ -238,6 +325,9 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); pc2dex_offset = pc_info.native_pc; pc2dex_dalvik_offset = pc_info.dex_pc; + if (src_map != nullptr) { + src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); + } } uint32_t total_entries = pc2dex_entries + dex2pc_entries; @@ -294,4 +384,158 @@ void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const { *data = vmap_encoder.GetData(); } +void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { + uint32_t size = stack_map_stream_.ComputeNeededSize(); + data->resize(size); + MemoryRegion region(data->data(), size); + stack_map_stream_.FillIn(region); +} + +void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { + // Collect PC infos for the mapping table. + struct PcInfo pc_info; + pc_info.dex_pc = dex_pc; + pc_info.native_pc = GetAssembler()->CodeSize(); + pc_infos_.Add(pc_info); + + // Populate stack map information. + + if (instruction == nullptr) { + // For stack overflow checks. + stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, 0); + return; + } + + LocationSummary* locations = instruction->GetLocations(); + HEnvironment* environment = instruction->GetEnvironment(); + + size_t environment_size = instruction->EnvironmentSize(); + + size_t register_mask = 0; + size_t inlining_depth = 0; + stack_map_stream_.AddStackMapEntry( + dex_pc, pc_info.native_pc, register_mask, + locations->GetStackMask(), environment_size, inlining_depth); + + // Walk over the environment, and record the location of dex registers. + for (size_t i = 0; i < environment_size; ++i) { + HInstruction* current = environment->GetInstructionAt(i); + if (current == nullptr) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kNone, 0); + continue; + } + + Location location = locations->GetEnvironmentAt(i); + switch (location.GetKind()) { + case Location::kConstant: { + DCHECK(current == location.GetConstant()); + if (current->IsLongConstant()) { + int64_t value = current->AsLongConstant()->GetValue(); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value)); + ++i; + DCHECK_LT(i, environment_size); + } else { + DCHECK(current->IsIntConstant()); + int32_t value = current->AsIntConstant()->GetValue(); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + } + break; + } + + case Location::kStackSlot: { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); + break; + } + + case Location::kDoubleStackSlot: { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, + location.GetHighStackIndex(kVRegSize)); + ++i; + DCHECK_LT(i, environment_size); + break; + } + + case Location::kRegister : { + int id = location.reg().RegId(); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); + if (current->GetType() == Primitive::kPrimDouble + || current->GetType() == Primitive::kPrimLong) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id); + ++i; + DCHECK_LT(i, environment_size); + } + break; + } + + default: + LOG(FATAL) << "Unexpected kind " << location.GetKind(); + } + } +} + +size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) { + return first_register_slot_in_slow_path_ + index * GetWordSize(); +} + +void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + SaveCoreRegister(Location::StackSlot(stack_offset), i); + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + +void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + RestoreCoreRegister(Location::StackSlot(stack_offset), i); + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + +void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const { + LocationSummary* locations = suspend_check->GetLocations(); + HBasicBlock* block = suspend_check->GetBlock(); + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check); + DCHECK(block->IsLoopHeader()); + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + LiveInterval* interval = current->GetLiveInterval(); + // We only need to clear bits of loop phis containing objects and allocated in register. + // Loop phis allocated on stack already have the object in the stack. + if (current->GetType() == Primitive::kPrimNot + && interval->HasRegister() + && interval->HasSpillSlot()) { + locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize); + } + } +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index e197ccd517..b58f3b3efc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -23,18 +23,22 @@ #include "locations.h" #include "memory_region.h" #include "nodes.h" +#include "stack_map_stream.h" #include "utils/assembler.h" namespace art { static size_t constexpr kVRegSize = 4; +static size_t constexpr kUninitializedFrameSize = 0; +class CodeGenerator; class DexCompilationUnit; +class SrcMap; class CodeAllocator { public: - CodeAllocator() { } - virtual ~CodeAllocator() { } + CodeAllocator() {} + virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; @@ -47,11 +51,29 @@ struct PcInfo { uintptr_t native_pc; }; +class SlowPathCode : public ArenaObject { + public: + SlowPathCode() : entry_label_(), exit_label_() {} + virtual ~SlowPathCode() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + virtual void EmitNativeCode(CodeGenerator* codegen) = 0; + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCode); +}; + class CodeGenerator : public ArenaObject { public: // Compiles the graph to executable instructions. Returns whether the compilation // succeeded. - void Compile(CodeAllocator* allocator); + void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); + void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, InstructionSet instruction_set); @@ -61,6 +83,13 @@ class CodeGenerator : public ArenaObject { Label* GetLabelOf(HBasicBlock* block) const; bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; + size_t GetStackSlotOfParameter(HParameterValue* parameter) const { + // Note that this follows the current calling convention. + return GetFrameSize() + + kVRegSize // Art method + + parameter->GetIndex() * kVRegSize; + } + virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(Label* label) = 0; @@ -69,6 +98,12 @@ class CodeGenerator : public ArenaObject { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; + void ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots); + virtual size_t FrameEntrySpillSize() const = 0; + int32_t GetStackSlot(HLocal* local) const; + Location GetTemporaryLocation(HTemporary* temp) const; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } @@ -80,29 +115,54 @@ class CodeGenerator : public ArenaObject { virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0; virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; + virtual InstructionSet GetInstructionSet() const = 0; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0; - void RecordPcInfo(uint32_t dex_pc) { - struct PcInfo pc_info; - pc_info.dex_pc = dex_pc; - pc_info.native_pc = GetAssembler()->CodeSize(); - pc_infos_.Add(pc_info); + void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); + + void AddSlowPath(SlowPathCode* slow_path) { + slow_paths_.Add(slow_path); } - void BuildMappingTable(std::vector<uint8_t>* vector) const; + void GenerateSlowPaths(); + + void BuildMappingTable(std::vector<uint8_t>* vector, SrcMap* src_map) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; + void BuildStackMaps(std::vector<uint8_t>* vector); + void SaveLiveRegisters(LocationSummary* locations); + void RestoreLiveRegisters(LocationSummary* locations); + + bool IsLeafMethod() const { + return is_leaf_; + } + + void MarkNotLeaf() { + is_leaf_ = false; + } + + // Clears the spill slots taken by loop phis in the `LocationSummary` of the + // suspend check. This is called when the code generator generates code + // for the suspend check at the back edge (instead of where the suspend check + // is, which is the loop entry). At this point, the spill slots for the phis + // have not been written to. + void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; protected: CodeGenerator(HGraph* graph, size_t number_of_registers) - : frame_size_(0), + : frame_size_(kUninitializedFrameSize), + core_spill_mask_(0), + first_register_slot_in_slow_path_(0), graph_(graph), block_labels_(graph->GetArena(), 0), pc_infos_(graph->GetArena(), 32), - blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) { - block_labels_.SetSize(graph->GetBlocks().Size()); - } - ~CodeGenerator() { } + slow_paths_(graph->GetArena(), 8), + blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)), + is_leaf_(true), + stack_map_stream_(graph->GetArena()) {} + ~CodeGenerator() {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -120,20 +180,26 @@ class CodeGenerator : public ArenaObject { // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; + uint32_t first_register_slot_in_slow_path_; private: void InitLocations(HInstruction* instruction); - void CompileBlock(HBasicBlock* block); + size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; GrowableArray<PcInfo> pc_infos_; + GrowableArray<SlowPathCode*> slow_paths_; // Temporary data structure used when doing register allocation. bool* const blocked_registers_; + bool is_leaf_; + + StackMapStream stack_map_stream_; + DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; @@ -150,10 +216,10 @@ class CallingConvention { return registers_[index]; } - uint8_t GetStackOffsetOf(size_t index, size_t word_size) const { + uint8_t GetStackOffsetOf(size_t index) const { // We still reserve the space for parameters passed by registers. - // Add word_size for the method pointer. - return index * kVRegSize + word_size; + // Add one for the method pointer. + return (index + 1) * kVRegSize; } private: diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ed3f43c9a5..1876cb9ca4 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -15,16 +15,17 @@ */ #include "code_generator_arm.h" -#include "utils/assembler.h" -#include "utils/arm/assembler_arm.h" -#include "utils/arm/managed_register_arm.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "gc/accounting/card_table.h" #include "mirror/array.h" #include "mirror/art_method.h" +#include "mirror/class.h" #include "thread.h" - -#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> +#include "utils/assembler.h" +#include "utils/arm/assembler_arm.h" +#include "utils/arm/managed_register_arm.h" +#include "utils/stack_checks.h" namespace art { @@ -34,9 +35,158 @@ arm::ArmManagedRegister Location::AsArm() const { namespace arm { -static constexpr int kNumberOfPushedRegistersAtEntry = 1; +static constexpr bool kExplicitStackOverflowCheck = false; + +static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; +static Location ArmCoreLocation(Register reg) { + return Location::RegisterLocation(ArmManagedRegister::FromCoreRegister(reg)); +} + +static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +#define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathARM : public SlowPathCode { + public: + explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowNullPointer).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HNullCheck* const instruction_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM); +}; + +class StackOverflowCheckSlowPathARM : public SlowPathCode { + public: + StackOverflowCheckSlowPathARM() {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ LoadFromOffset(kLoadWord, PC, TR, + QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value()); + } + + private: + DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM); +}; + +class SuspendCheckSlowPathARM : public SlowPathCode { + public: + explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor) + : instruction_(instruction), successor_(successor) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pTestSuspend).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); + if (successor_ == nullptr) { + __ b(GetReturnLabel()); + } else { + __ b(codegen->GetLabelOf(successor_)); + } + } + + Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + private: + HSuspendCheck* const instruction_; + // If not null, the block to branch to after the suspend check. + HBasicBlock* const successor_; + + // If `successor_` is null, the label to branch to after the suspend check. + Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM); +}; + +class BoundsCheckSlowPathARM : public SlowPathCode { + public: + BoundsCheckSlowPathARM(HBoundsCheck* instruction, + Location index_location, + Location length_location) + : instruction_(instruction), + index_location_(index_location), + length_location_(length_location) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = reinterpret_cast<CodeGeneratorARM*>(codegen); + __ Bind(GetEntryLabel()); + InvokeRuntimeCallingConvention calling_convention; + arm_codegen->Move32(ArmCoreLocation(calling_convention.GetRegisterAt(0)), index_location_); + arm_codegen->Move32(ArmCoreLocation(calling_convention.GetRegisterAt(1)), length_location_); + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowArrayBounds).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HBoundsCheck* const instruction_; + const Location index_location_; + const Location length_location_; + + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM); +}; + +#undef __ +#define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> + +inline Condition ARMCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return EQ; + case kCondNE: return NE; + case kCondLT: return LT; + case kCondLE: return LE; + case kCondGT: return GT; + case kCondGE: return GE; + default: + LOG(FATAL) << "Unknown if condition"; + } + return EQ; // Unreachable. +} + +inline Condition ARMOppositeCondition(IfCondition cond) { + switch (cond) { + case kCondEQ: return NE; + case kCondNE: return EQ; + case kCondLT: return GE; + case kCondLE: return GT; + case kCondGT: return LE; + case kCondGE: return LT; + default: + LOG(FATAL) << "Unknown if condition"; + } + return EQ; // Unreachable. +} + void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { stream << ArmManagedRegister::FromCoreRegister(Register(reg)); } @@ -45,10 +195,24 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << ArmManagedRegister::FromDRegister(DRegister(reg)); } +void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ str(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + +void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ ldr(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), - instruction_visitor_(graph, this) {} + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this), + assembler_(true) {} + +size_t CodeGeneratorARM::FrameEntrySpillSize() const { + return kNumberOfPushedRegistersAtEntry * kArmWordSize; +} static bool* GetBlockedRegisterPairs(bool* blocked_registers) { return blocked_registers + kNumberOfAllocIds; @@ -58,12 +222,23 @@ ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type, bool* blocked_registers) const { switch (type) { case Primitive::kPrimLong: { - size_t reg = AllocateFreeRegisterInternal( - GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs); + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs); ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); blocked_registers[pair.AsRegisterPairLow()] = true; blocked_registers[pair.AsRegisterPairHigh()] = true; + // Block all other register pairs that share a register with `pair`. + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + ArmManagedRegister current = + ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() + || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() + || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() + || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { + blocked_register_pairs[i] = true; + } + } return pair; } @@ -73,7 +248,16 @@ ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type, case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: { - size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters); + int reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters); + // Block all register pairs that contain `reg`. + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + ArmManagedRegister current = + ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { + blocked_register_pairs[i] = true; + } + } return ArmManagedRegister::FromCoreRegister(static_cast<Register>(reg)); } @@ -106,39 +290,47 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const { // Reserve thread register. blocked_registers[TR] = true; + // Reserve temp register. + blocked_registers[IP] = true; + // TODO: We currently don't use Quick's callee saved registers. + // We always save and restore R6 and R7 to make sure we can use three + // register pairs for long operations. blocked_registers[R5] = true; - blocked_registers[R6] = true; - blocked_registers[R7] = true; blocked_registers[R8] = true; blocked_registers[R10] = true; blocked_registers[R11] = true; - blocked_register_pairs[R6_R7] = true; } size_t CodeGeneratorARM::GetNumberOfRegisters() const { return kNumberOfRegIds; } -static Location ArmCoreLocation(Register reg) { - return Location::RegisterLocation(ArmManagedRegister::FromCoreRegister(reg)); -} - InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) : HGraphVisitor(graph), assembler_(codegen->GetAssembler()), codegen_(codegen) {} void CodeGeneratorARM::GenerateFrameEntry() { - core_spill_mask_ |= (1 << LR); - __ PushList((1 << LR)); - - SetFrameSize(RoundUp( - (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize - + kVRegSize // filler - + kArmWordSize // Art method - + kNumberOfPushedRegistersAtEntry * kArmWordSize, - kStackAlignment)); + bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + if (!skip_overflow_check) { + if (kExplicitStackOverflowCheck) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM(); + AddSlowPath(slow_path); + + __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value()); + __ cmp(SP, ShifterOperand(IP)); + __ b(slow_path->GetEntryLabel(), CC); + } else { + __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); + __ ldr(IP, Address(IP, 0)); + RecordPcInfo(nullptr, 0); + } + } + + core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7); + __ PushList(1 << LR | 1 << R6 | 1 << R7); + // The return PC has already been pushed on the stack. __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); __ str(R0, Address(SP, 0)); @@ -146,30 +338,13 @@ void CodeGeneratorARM::GenerateFrameEntry() { void CodeGeneratorARM::GenerateFrameExit() { __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); - __ PopList((1 << PC)); + __ PopList(1 << PC | 1 << R6 | 1 << R7); } void CodeGeneratorARM::Bind(Label* label) { __ Bind(label); } -int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const { - uint16_t reg_number = local->GetRegNumber(); - uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); - uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs(); - if (reg_number >= number_of_vregs - number_of_in_vregs) { - // Local is a parameter of the method. It is stored in the caller's frame. - return GetFrameSize() + kArmWordSize // ART method - + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize; - } else { - // Local is a temporary in this method. It is stored in this method's frame. - return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kArmWordSize) - - kVRegSize // filler. - - (number_of_vregs * kVRegSize) - + (reg_number * kVRegSize); - } -} - Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -208,7 +383,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (index < calling_convention.GetNumberOfRegisters()) { return ArmCoreLocation(calling_convention.GetRegisterAt(index)); } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize)); + return Location::StackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -221,7 +396,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { return Location::QuickParameter(index); } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize)); + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -252,8 +427,8 @@ void CodeGeneratorARM::Move32(Location destination, Location source) { if (source.IsRegister()) { __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex())); } else { - __ ldr(R0, Address(SP, source.GetStackIndex())); - __ str(R0, Address(SP, destination.GetStackIndex())); + __ ldr(IP, Address(SP, source.GetStackIndex())); + __ str(IP, Address(SP, destination.GetStackIndex())); } } } @@ -272,7 +447,7 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ Mov(destination.AsArm().AsRegisterPairLow(), calling_convention.GetRegisterAt(argument_index)); __ ldr(destination.AsArm().AsRegisterPairHigh(), - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize())); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); } else { DCHECK(source.IsDoubleStackSlot()); if (destination.AsArm().AsRegisterPair() == R1_R2) { @@ -289,12 +464,12 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { if (source.IsRegister()) { __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow()); __ str(source.AsArm().AsRegisterPairHigh(), - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize))); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1))); } else { DCHECK(source.IsDoubleStackSlot()); __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex())); __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize))); - __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize))); + __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1))); } } else { DCHECK(destination.IsDoubleStackSlot()); @@ -312,26 +487,32 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ str(calling_convention.GetRegisterAt(argument_index), Address(SP, destination.GetStackIndex())); __ ldr(R0, - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize())); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize))); } else { DCHECK(source.IsDoubleStackSlot()); - __ ldr(R0, Address(SP, source.GetStackIndex())); - __ str(R0, Address(SP, destination.GetStackIndex())); - __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize))); - __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize))); + __ ldr(IP, Address(SP, source.GetStackIndex())); + __ str(IP, Address(SP, destination.GetStackIndex())); + __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize))); + __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize))); } } } void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) { + LocationSummary* locations = instruction->GetLocations(); + if (locations != nullptr && locations->Out().Equals(location)) { + return; + } + if (instruction->AsIntConstant() != nullptr) { int32_t value = instruction->AsIntConstant()->GetValue(); if (location.IsRegister()) { __ LoadImmediate(location.AsArm().AsCoreRegister(), value); } else { - __ LoadImmediate(R0, value); - __ str(R0, Address(SP, location.GetStackIndex())); + DCHECK(location.IsStackSlot()); + __ LoadImmediate(IP, value); + __ str(IP, Address(SP, location.GetStackIndex())); } } else if (instruction->AsLongConstant() != nullptr) { int64_t value = instruction->AsLongConstant()->GetValue(); @@ -339,10 +520,11 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value)); __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value)); } else { - __ LoadImmediate(R0, Low32Bits(value)); - __ str(R0, Address(SP, location.GetStackIndex())); - __ LoadImmediate(R0, High32Bits(value)); - __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize))); + DCHECK(location.IsDoubleStackSlot()); + __ LoadImmediate(IP, Low32Bits(value)); + __ str(IP, Address(SP, location.GetStackIndex())); + __ LoadImmediate(IP, High32Bits(value)); + __ str(IP, Address(SP, location.GetHighStackIndex(kArmWordSize))); } } else if (instruction->AsLoadLocal() != nullptr) { uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal()); @@ -364,9 +546,7 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr LOG(FATAL) << "Unimplemented type " << instruction->GetType(); } } else { - // This can currently only happen when the instruction that requests the move - // is the next to be compiled. - DCHECK_EQ(instruction->GetNext(), move_for); + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -374,11 +554,11 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr case Primitive::kPrimShort: case Primitive::kPrimNot: case Primitive::kPrimInt: - Move32(location, instruction->GetLocations()->Out()); + Move32(location, locations->Out()); break; case Primitive::kPrimLong: - Move64(location, instruction->GetLocations()->Out()); + Move64(location, locations->Out()); break; default: @@ -393,9 +573,22 @@ void LocationsBuilderARM::VisitGoto(HGoto* got) { void InstructionCodeGeneratorARM::VisitGoto(HGoto* got) { HBasicBlock* successor = got->GetSuccessor(); - if (GetGraph()->GetExitBlock() == successor) { - codegen_->GenerateFrameExit(); - } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { + DCHECK(!successor->IsExitBlock()); + + HBasicBlock* block = got->GetBlock(); + HInstruction* previous = got->GetPrevious(); + + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) { + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; + } + + if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { __ b(codegen_->GetLabelOf(successor)); } } @@ -412,34 +605,137 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { } void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::RequiresRegister()); - if_instr->SetLocations(locations); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } } void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - // TODO: Generate the input as a condition, instead of materializing in a register. - __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()), EQ); - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Condition has been materialized, compare the output to 0 + DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); + __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(0)); + __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); + } else { + // Condition has not been materialized, use its inputs as the comparison and its + // condition as the branch condition. + LocationSummary* locations = condition->GetLocations(); + if (locations->InAt(1).IsRegister()) { + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + } else { + DCHECK(locations->InAt(1).IsConstant()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + ShifterOperand operand; + if (ShifterOperand::CanHoldArm(value, &operand)) { + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(value)); + } else { + Register temp = IP; + __ LoadImmediate(temp, value); + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(temp)); + } + } + __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), + ARMCondition(condition->GetCondition())); + } + + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); } } -void LocationsBuilderARM::VisitEqual(HEqual* equal) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); + +void LocationsBuilderARM::VisitCondition(HCondition* comp) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); - equal->SetLocations(locations); + locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1))); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorARM::VisitEqual(HEqual* equal) { - LocationSummary* locations = equal->GetLocations(); - __ teq(locations->InAt(0).AsArm().AsCoreRegister(), - ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); - __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), EQ); - __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), NE); +void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) { + if (!comp->NeedsMaterialization()) return; + + LocationSummary* locations = comp->GetLocations(); + if (locations->InAt(1).IsRegister()) { + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + } else { + DCHECK(locations->InAt(1).IsConstant()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + ShifterOperand operand; + if (ShifterOperand::CanHoldArm(value, &operand)) { + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(value)); + } else { + Register temp = IP; + __ LoadImmediate(temp, value); + __ cmp(locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(temp)); + } + } + __ it(ARMCondition(comp->GetCondition()), kItElse); + __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), + ARMCondition(comp->GetCondition())); + __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), + ARMOppositeCondition(comp->GetCondition())); +} + +void LocationsBuilderARM::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } void LocationsBuilderARM::VisitLocal(HLocal* local) { @@ -459,7 +755,8 @@ void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load) { } void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall); switch (store->InputAt(1)->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -477,22 +774,24 @@ void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) { default: LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType(); } - store->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store) { } void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) { - constant->SetLocations(nullptr); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); } void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { - // Will be generated at use site. } void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) { - constant->SetLocations(nullptr); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); } void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) { @@ -508,7 +807,8 @@ void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) { } void LocationsBuilderARM::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); switch (ret->InputAt(0)->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -527,8 +827,6 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) { default: LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); } - - ret->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { @@ -555,8 +853,50 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); - locations->AddTemp(Location::RequiresRegister()); + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) { + __ ldr(reg, Address(SP, kCurrentMethodStackOffset)); +} + +void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { + Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister(); + uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>); + size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() + + invoke->GetIndexInDexCache() * kArmWordSize; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ ldr(temp, Address(temp, index_in_cache)); + // LR = temp[offset_of_quick_compiled_code] + __ ldr(LR, Address(temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); + // LR() + __ blx(LR); + + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); +} + +void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + HandleInvoke(invoke); +} + +void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); + locations->AddTemp(ArmCoreLocation(R0)); InvokeDexCallingConventionVisitor calling_convention_visitor; for (size_t i = 0; i < invoke->InputCount(); i++) { @@ -586,48 +926,42 @@ void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) { LOG(FATAL) << "Unimplemented return type " << invoke->GetType(); break; } - - invoke->SetLocations(locations); } -void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) { - __ ldr(reg, Address(SP, kCurrentMethodStackOffset)); -} -void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { +void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister(); - size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - invoke->GetIndexInDexCache() * kArmWordSize; - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. - - // temp = method; - LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ ldr(temp, Address(temp, index_in_cache)); - // LR = temp[offset_of_quick_compiled_code] - __ ldr(LR, Address(temp, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); - // LR() + uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + LocationSummary* locations = invoke->GetLocations(); + Location receiver = locations->InAt(0); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + // temp = object->GetClass(); + if (receiver.IsStackSlot()) { + __ ldr(temp, Address(SP, receiver.GetStackIndex())); + __ ldr(temp, Address(temp, class_offset)); + } else { + __ ldr(temp, Address(receiver.AsArm().AsCoreRegister(), class_offset)); + } + // temp = temp->GetMethodAt(method_offset); + uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(); + __ ldr(temp, Address(temp, method_offset)); + // LR = temp->GetEntryPoint(); + __ ldr(LR, Address(temp, entry_point)); + // LR(); __ blx(LR); - - codegen_->RecordPcInfo(invoke->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::VisitAdd(HAdd* add) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister()); break; } @@ -642,16 +976,21 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { default: LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); } - add->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { LocationSummary* locations = add->GetLocations(); switch (add->GetResultType()) { case Primitive::kPrimInt: - __ add(locations->Out().AsArm().AsCoreRegister(), - locations->InAt(0).AsArm().AsCoreRegister(), - ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + if (locations->InAt(1).IsRegister()) { + __ add(locations->Out().AsArm().AsCoreRegister(), + locations->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + } else { + __ AddConstant(locations->Out().AsArm().AsCoreRegister(), + locations->InAt(0).AsArm().AsCoreRegister(), + locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()); + } break; case Primitive::kPrimLong: @@ -676,12 +1015,13 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { } void LocationsBuilderARM::VisitSub(HSub* sub) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); locations->SetOut(Location::RequiresRegister()); break; } @@ -696,17 +1036,23 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { default: LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); } - sub->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { LocationSummary* locations = sub->GetLocations(); switch (sub->GetResultType()) { - case Primitive::kPrimInt: - __ sub(locations->Out().AsArm().AsCoreRegister(), - locations->InAt(0).AsArm().AsCoreRegister(), - ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + case Primitive::kPrimInt: { + if (locations->InAt(1).IsRegister()) { + __ sub(locations->Out().AsArm().AsCoreRegister(), + locations->InAt(0).AsArm().AsCoreRegister(), + ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister())); + } else { + __ AddConstant(locations->Out().AsArm().AsCoreRegister(), + locations->InAt(0).AsArm().AsCoreRegister(), + -locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()); + } break; + } case Primitive::kPrimLong: __ subs(locations->Out().AsArm().AsRegisterPairLow(), @@ -729,27 +1075,13 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } } -static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1 }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); - -class InvokeRuntimeCallingConvention : public CallingConvention<Register> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(0))); locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(ArmCoreLocation(R0)); - instruction->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { @@ -761,11 +1093,13 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { __ ldr(LR, Address(TR, offset)); __ blx(LR); - codegen_->RecordPcInfo(instruction->GetDexPc()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -773,7 +1107,6 @@ void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); } locations->SetOut(location); - instruction->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitParameterValue(HParameterValue* instruction) { @@ -781,10 +1114,10 @@ void InstructionCodeGeneratorARM::VisitParameterValue(HParameterValue* instructi } void LocationsBuilderARM::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); - instruction->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { @@ -793,20 +1126,586 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1)); } +void LocationsBuilderARM::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + Register output = locations->Out().AsArm().AsCoreRegister(); + ArmManagedRegister left = locations->InAt(0).AsArm(); + ArmManagedRegister right = locations->InAt(1).AsArm(); + Label less, greater, done; + __ cmp(left.AsRegisterPairHigh(), + ShifterOperand(right.AsRegisterPairHigh())); // Signed compare. + __ b(&less, LT); + __ b(&greater, GT); + // Do LoadImmediate before any `cmp`, as LoadImmediate might affect + // the status flags. + __ LoadImmediate(output, 0); + __ cmp(left.AsRegisterPairLow(), + ShifterOperand(right.AsRegisterPairLow())); // Unsigned compare. + __ b(&done, EQ); + __ b(&less, CC); + + __ Bind(&greater); + __ LoadImmediate(output, 1); + __ b(&done); + + __ Bind(&less); + __ LoadImmediate(output, -1); + + __ Bind(&done); + break; + } + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } +} + void LocationsBuilderARM::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); } void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; +} + +void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers for the write barrier. + if (instruction->GetFieldType() == Primitive::kPrimNot) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Primitive::Type field_type = instruction->GetFieldType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreByte, value, obj, offset); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreHalfword, value, obj, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register value = locations->InAt(1).AsArm().AsCoreRegister(); + __ StoreToOffset(kStoreWord, value, obj, offset); + if (field_type == Primitive::kPrimNot) { + Register temp = locations->GetTemp(0).AsArm().AsCoreRegister(); + Register card = locations->GetTemp(1).AsArm().AsCoreRegister(); + codegen_->MarkGCCard(temp, card, obj, value); + } + break; + } + + case Primitive::kPrimLong: { + ArmManagedRegister value = locations->InAt(1).AsArm(); + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow(), obj, offset); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + break; + } + + case Primitive::kPrimByte: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + break; + } + + case Primitive::kPrimShort: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + break; + } + + case Primitive::kPrimChar: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadWord, out, obj, offset); + break; + } + + case Primitive::kPrimLong: { + // TODO: support volatile. + ArmManagedRegister out = locations->Out().AsArm(); + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow(), obj, offset); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmp(obj.AsArm().AsCoreRegister(), ShifterOperand(0)); + } + __ b(slow_path->GetEntryLabel(), EQ); +} + +void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + Location index = locations->InAt(1); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + Register out = locations->Out().AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister())); + __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset); + } + break; + } + + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); + Register out = locations->Out().AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister())); + __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset); + } + break; + } + + case Primitive::kPrimShort: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); + Register out = locations->Out().AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_2)); + __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset); + } + break; + } + + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Register out = locations->Out().AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_2)); + __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset); + } + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register out = locations->Out().AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, out, IP, data_offset); + } + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + ArmManagedRegister out = locations->Out().AsArm(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_8)); + __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow(), IP, data_offset); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + bool is_object = value_type == Primitive::kPrimNot; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall); + if (is_object) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, ArmCoreLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, ArmCoreLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, ArmCoreLocation(calling_convention.GetRegisterAt(2))); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + Location index = locations->InAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + Register value = locations->InAt(2).AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + __ StoreToOffset(kStoreByte, value, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister())); + __ StoreToOffset(kStoreByte, value, IP, data_offset); + } + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Register value = locations->InAt(2).AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ StoreToOffset(kStoreHalfword, value, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_2)); + __ StoreToOffset(kStoreHalfword, value, IP, data_offset); + } + break; + } + + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register value = locations->InAt(2).AsArm().AsCoreRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreToOffset(kStoreWord, value, obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_4)); + __ StoreToOffset(kStoreWord, value, IP, data_offset); + } + break; + } + + case Primitive::kPrimNot: { + int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAputObject).Int32Value(); + __ ldr(LR, Address(TR, offset)); + __ blx(LR); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + ArmManagedRegister value = locations->InAt(2).AsArm(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsArm().AsCoreRegister(), LSL, TIMES_8)); + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow(), IP, data_offset); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); + Register obj = locations->InAt(0).AsArm().AsCoreRegister(); + Register out = locations->Out().AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadWord, out, obj, offset); +} + +void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM( + instruction, locations->InAt(0), locations->InAt(1)); + codegen_->AddSlowPath(slow_path); + + Register index = locations->InAt(0).AsArm().AsCoreRegister(); + Register length = locations->InAt(1).AsArm().AsCoreRegister(); + + __ cmp(index, ShifterOperand(length)); + __ b(slow_path->GetEntryLabel(), CS); +} + +void CodeGeneratorARM::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ CompareAndBranchIfZero(value, &is_null); + __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value()); + __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + __ strb(card, Address(card, temp)); + __ Bind(&is_null); +} + +void LocationsBuilderARM::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. } void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) { + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathARM* slow_path = + new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + codegen_->AddSlowPath(slow_path); + + __ AddConstant(R4, R4, -1); + __ cmp(R4, ShifterOperand(0)); + if (successor == nullptr) { + __ b(slow_path->GetEntryLabel(), LE); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ b(codegen_->GetLabelOf(successor), GT); + __ b(slow_path->GetEntryLabel()); + } +} + +ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverARM::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister()); + } else { + DCHECK(destination.IsStackSlot()); + __ StoreToOffset(kStoreWord, source.AsArm().AsCoreRegister(), + SP, destination.GetStackIndex()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + __ LoadFromOffset(kLoadWord, destination.AsArm().AsCoreRegister(), + SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } + } else { + DCHECK(source.IsConstant()); + DCHECK(source.GetConstant()->AsIntConstant() != nullptr); + int32_t value = source.GetConstant()->AsIntConstant()->GetValue(); + if (destination.IsRegister()) { + __ LoadImmediate(destination.AsArm().AsCoreRegister(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, value); + __ str(IP, Address(SP, destination.GetStackIndex())); + } + } +} + +void ParallelMoveResolverARM::Exchange(Register reg, int mem) { + __ Mov(IP, reg); + __ LoadFromOffset(kLoadWord, reg, SP, mem); + __ StoreToOffset(kStoreWord, IP, SP, mem); +} + +void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch(this, IP, R0, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0; + __ LoadFromOffset(kLoadWord, static_cast<Register>(ensure_scratch.GetRegister()), + SP, mem1 + stack_offset); + __ LoadFromOffset(kLoadWord, IP, SP, mem2 + stack_offset); + __ StoreToOffset(kStoreWord, static_cast<Register>(ensure_scratch.GetRegister()), + SP, mem2 + stack_offset); + __ StoreToOffset(kStoreWord, IP, SP, mem1 + stack_offset); +} + +void ParallelMoveResolverARM::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister() && destination.IsRegister()) { + DCHECK_NE(source.AsArm().AsCoreRegister(), IP); + DCHECK_NE(destination.AsArm().AsCoreRegister(), IP); + __ Mov(IP, source.AsArm().AsCoreRegister()); + __ Mov(source.AsArm().AsCoreRegister(), destination.AsArm().AsCoreRegister()); + __ Mov(destination.AsArm().AsCoreRegister(), IP); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(source.AsArm().AsCoreRegister(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(destination.AsArm().AsCoreRegister(), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + Exchange(source.GetStackIndex(), destination.GetStackIndex()); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverARM::SpillScratch(int reg) { + __ Push(static_cast<Register>(reg)); +} + +void ParallelMoveResolverARM::RestoreScratch(int reg) { + __ Pop(static_cast<Register>(reg)); } } // namespace arm diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 423b13ef3f..8c86b7a237 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -19,7 +19,8 @@ #include "code_generator.h" #include "nodes.h" -#include "utils/arm/assembler_arm.h" +#include "parallel_move_resolver.h" +#include "utils/arm/assembler_thumb2.h" namespace art { namespace arm { @@ -59,18 +60,41 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; +class ParallelMoveResolverARM : public ParallelMoveResolver { + public: + ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + virtual void EmitMove(size_t index) OVERRIDE; + virtual void EmitSwap(size_t index) OVERRIDE; + virtual void SpillScratch(int reg) OVERRIDE; + virtual void RestoreScratch(int reg) OVERRIDE; + + ArmAssembler* GetAssembler() const; + + private: + void Exchange(Register reg, int mem); + void Exchange(int mem1, int mem2); + + CodeGeneratorARM* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM); +}; + class LocationsBuilderARM : public HGraphVisitor { public: - explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) + LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name) \ virtual void Visit##name(H##name* instr); - FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void HandleInvoke(HInvoke* invoke); + private: CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -85,7 +109,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name) \ virtual void Visit##name(H##name* instr); - FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -93,6 +117,11 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void LoadCurrentMethod(Register reg); private: + // Generate code for the given suspend check. If not null, `successor` + // is the block to branch to if the suspend check is not needed, and after + // the suspend call. + void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; @@ -102,17 +131,21 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: explicit CodeGeneratorARM(HGraph* graph); - virtual ~CodeGeneratorARM() { } + virtual ~CodeGeneratorARM() {} virtual void GenerateFrameEntry() OVERRIDE; virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kArmWordSize; } + virtual size_t FrameEntrySpillSize() const OVERRIDE; + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -130,7 +163,6 @@ class CodeGeneratorARM : public CodeGenerator { Primitive::Type type, bool* blocked_registers) const OVERRIDE; virtual size_t GetNumberOfRegisters() const OVERRIDE; - int32_t GetStackSlot(HLocal* local) const; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { @@ -144,15 +176,27 @@ class CodeGeneratorARM : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - private: + ParallelMoveResolverARM* GetMoveResolver() { + return &move_resolver_; + } + + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kThumb2; + } + // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); + // Emit a write barrier. + void MarkGCCard(Register temp, Register card, Register object, Register value); + + private: LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; - ArmAssembler assembler_; + ParallelMoveResolverARM move_resolver_; + Thumb2Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8bfd8d67c4..ea67dfda32 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -15,16 +15,17 @@ */ #include "code_generator_x86.h" -#include "utils/assembler.h" -#include "utils/x86/assembler_x86.h" -#include "utils/x86/managed_register_x86.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "gc/accounting/card_table.h" #include "mirror/array.h" #include "mirror/art_method.h" +#include "mirror/class.h" #include "thread.h" - -#define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> +#include "utils/assembler.h" +#include "utils/stack_checks.h" +#include "utils/x86/assembler_x86.h" +#include "utils/x86/managed_register_x86.h" namespace art { @@ -34,9 +35,134 @@ x86::X86ManagedRegister Location::AsX86() const { namespace x86 { +static constexpr bool kExplicitStackOverflowCheck = false; + static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; +static Location X86CpuLocation(Register reg) { + return Location::RegisterLocation(X86ManagedRegister::FromCpuRegister(reg)); +} + +static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathX86 : public SlowPathCode { + public: + explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HNullCheck* const instruction_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); +}; + +class StackOverflowCheckSlowPathX86 : public SlowPathCode { + public: + StackOverflowCheckSlowPathX86() {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ addl(ESP, + Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); + __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow))); + } + + private: + DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86); +}; + +class BoundsCheckSlowPathX86 : public SlowPathCode { + public: + BoundsCheckSlowPathX86(HBoundsCheck* instruction, + Location index_location, + Location length_location) + : instruction_(instruction), index_location_(index_location), length_location_(length_location) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = reinterpret_cast<CodeGeneratorX86*>(codegen); + __ Bind(GetEntryLabel()); + InvokeRuntimeCallingConvention calling_convention; + x86_codegen->Move32(X86CpuLocation(calling_convention.GetRegisterAt(0)), index_location_); + x86_codegen->Move32(X86CpuLocation(calling_convention.GetRegisterAt(1)), length_location_); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HBoundsCheck* const instruction_; + const Location index_location_; + const Location length_location_; + + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86); +}; + +class SuspendCheckSlowPathX86 : public SlowPathCode { + public: + explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) + : instruction_(instruction), successor_(successor) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); + if (successor_ == nullptr) { + __ jmp(GetReturnLabel()); + } else { + __ jmp(codegen->GetLabelOf(successor_)); + } + } + + Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + private: + HSuspendCheck* const instruction_; + HBasicBlock* const successor_; + Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86); +}; + +#undef __ +#define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> + +inline Condition X86Condition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kLess; + case kCondLE: return kLessEqual; + case kCondGT: return kGreater; + case kCondGE: return kGreaterEqual; + default: + LOG(FATAL) << "Unknown if condition"; + } + return kEqual; +} + void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const { stream << X86ManagedRegister::FromCpuRegister(Register(reg)); } @@ -45,10 +171,23 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); } +void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id)); +} + +void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex())); +} + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), - instruction_visitor_(graph, this) {} + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this) {} + +size_t CodeGeneratorX86::FrameEntrySpillSize() const { + return kNumberOfPushedRegistersAtEntry * kX86WordSize; +} static bool* GetBlockedRegisterPairs(bool* blocked_registers) { return blocked_registers + kNumberOfAllocIds; @@ -58,12 +197,23 @@ ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type, bool* blocked_registers) const { switch (type) { case Primitive::kPrimLong: { - size_t reg = AllocateFreeRegisterInternal( - GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs); + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs); X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); blocked_registers[pair.AsRegisterPairLow()] = true; blocked_registers[pair.AsRegisterPairHigh()] = true; + // Block all other register pairs that share a register with `pair`. + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + X86ManagedRegister current = + X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() + || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() + || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() + || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { + blocked_register_pairs[i] = true; + } + } return pair; } @@ -73,8 +223,18 @@ ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type, case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: { - size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters); - return X86ManagedRegister::FromCpuRegister(static_cast<Register>(reg)); + Register reg = static_cast<Register>( + AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters)); + // Block all register pairs that contain `reg`. + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + X86ManagedRegister current = + X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { + blocked_register_pairs[i] = true; + } + } + return X86ManagedRegister::FromCpuRegister(reg); } case Primitive::kPrimFloat: @@ -111,10 +271,6 @@ size_t CodeGeneratorX86::GetNumberOfRegisters() const { return kNumberOfRegIds; } -static Location X86CpuLocation(Register reg) { - return Location::RegisterLocation(X86ManagedRegister::FromCpuRegister(reg)); -} - InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) : HGraphVisitor(graph), assembler_(codegen->GetAssembler()), @@ -125,15 +281,23 @@ void CodeGeneratorX86::GenerateFrameEntry() { static const int kFakeReturnRegister = 8; core_spill_mask_ |= (1 << kFakeReturnRegister); - SetFrameSize(RoundUp( - (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize - + kVRegSize // filler - + kX86WordSize // Art method - + kNumberOfPushedRegistersAtEntry * kX86WordSize, - kStackAlignment)); + bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); + if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); + RecordPcInfo(nullptr, 0); + } // The return PC has already been pushed on the stack. __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); + + if (!skip_overflow_check && kExplicitStackOverflowCheck) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86(); + AddSlowPath(slow_path); + + __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>())); + __ j(kLess, slow_path->GetEntryLabel()); + } + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } @@ -149,24 +313,6 @@ void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) { __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } -int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const { - uint16_t reg_number = local->GetRegNumber(); - uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); - uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs(); - if (reg_number >= number_of_vregs - number_of_in_vregs) { - // Local is a parameter of the method. It is stored in the caller's frame. - return GetFrameSize() + kX86WordSize // ART method - + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize; - } else { - // Local is a temporary in this method. It is stored in this method's frame. - return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86WordSize) - - kVRegSize // filler. - - (number_of_vregs * kVRegSize) - + (reg_number * kVRegSize); - } -} - - Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -193,20 +339,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { return Location(); } -static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX }; -static constexpr size_t kRuntimeParameterCoreRegistersLength = - arraysize(kRuntimeParameterCoreRegisters); - -class InvokeRuntimeCallingConvention : public CallingConvention<Register> { - public: - InvokeRuntimeCallingConvention() - : CallingConvention(kRuntimeParameterCoreRegisters, - kRuntimeParameterCoreRegistersLength) {} - - private: - DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); -}; - Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: @@ -219,7 +351,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (index < calling_convention.GetNumberOfRegisters()) { return X86CpuLocation(calling_convention.GetRegisterAt(index)); } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize)); + return Location::StackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -232,7 +364,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { return Location::QuickParameter(index); } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize)); + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -264,8 +396,8 @@ void CodeGeneratorX86::Move32(Location destination, Location source) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister()); } else { DCHECK(source.IsStackSlot()); - __ movl(EAX, Address(ESP, source.GetStackIndex())); - __ movl(Address(ESP, destination.GetStackIndex()), EAX); + __ pushl(Address(ESP, source.GetStackIndex())); + __ popl(Address(ESP, destination.GetStackIndex())); } } } @@ -284,7 +416,7 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(destination.AsX86().AsRegisterPairLow(), calling_convention.GetRegisterAt(argument_index)); __ movl(destination.AsX86().AsRegisterPairHigh(), Address(ESP, - calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize())); + calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); } else { DCHECK(source.IsDoubleStackSlot()); __ movl(destination.AsX86().AsRegisterPairLow(), Address(ESP, source.GetStackIndex())); @@ -296,16 +428,17 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { uint32_t argument_index = destination.GetQuickParameterIndex(); if (source.IsRegister()) { __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow()); - __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), + __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)), source.AsX86().AsRegisterPairHigh()); } else { DCHECK(source.IsDoubleStackSlot()); __ movl(calling_convention.GetRegisterAt(argument_index), Address(ESP, source.GetStackIndex())); - __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), EAX); + __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); + __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1))); } } else { + DCHECK(destination.IsDoubleStackSlot()); if (source.IsRegister()) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsRegisterPairLow()); __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), @@ -315,15 +448,15 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { uint32_t argument_index = source.GetQuickParameterIndex(); __ movl(Address(ESP, destination.GetStackIndex()), calling_convention.GetRegisterAt(argument_index)); - __ movl(EAX, Address(ESP, - calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize())); - __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX); + __ pushl(Address(ESP, + calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); + __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); } else { DCHECK(source.IsDoubleStackSlot()); - __ movl(EAX, Address(ESP, source.GetStackIndex())); - __ movl(Address(ESP, destination.GetStackIndex()), EAX); - __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX); + __ pushl(Address(ESP, source.GetStackIndex())); + __ popl(Address(ESP, destination.GetStackIndex())); + __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); + __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); } } } @@ -365,9 +498,7 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr LOG(FATAL) << "Unimplemented local type " << instruction->GetType(); } } else { - // This can currently only happen when the instruction that requests the move - // is the next to be compiled. - DCHECK_EQ(instruction->GetNext(), move_for); + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); switch (instruction->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -394,9 +525,22 @@ void LocationsBuilderX86::VisitGoto(HGoto* got) { void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HBasicBlock* successor = got->GetSuccessor(); - if (GetGraph()->GetExitBlock() == successor) { - codegen_->GenerateFrameExit(); - } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { + DCHECK(!successor->IsExitBlock()); + + HBasicBlock* block = got->GetBlock(); + HInstruction* previous = got->GetPrevious(); + + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) { + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; + } + + if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { __ jmp(codegen_->GetLabelOf(successor)); } } @@ -413,22 +557,51 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { } void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - locations->SetInAt(0, Location::Any()); - if_instr->SetLocations(locations); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } } void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - // TODO: Generate the input as a condition, instead of materializing in a register. - Location location = if_instr->GetLocations()->InAt(0); - if (location.IsRegister()) { - __ cmpl(location.AsX86().AsCpuRegister(), Immediate(0)); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Moves do not affect the eflags register, so if the condition is evaluated + // just before the if, we don't need to evaluate it again. + if (!condition->IsBeforeWhenDisregardMoves(if_instr)) { + // Materialized condition, compare against 0 + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); + } + } + __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } else { - __ cmpl(Address(ESP, location.GetStackIndex()), Immediate(0)); + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + if (rhs.IsRegister()) { + __ cmpl(lhs.AsX86().AsCpuRegister(), rhs.AsX86().AsCpuRegister()); + } else if (rhs.IsConstant()) { + HIntConstant* instruction = rhs.GetConstant()->AsIntConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ cmpl(lhs.AsX86().AsCpuRegister(), imm); + } else { + __ cmpl(lhs.AsX86().AsCpuRegister(), Address(ESP, rhs.GetStackIndex())); + } + __ j(X86Condition(condition->GetCondition()), + codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } - __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); } } @@ -449,7 +622,8 @@ void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load) { } void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall); switch (store->InputAt(1)->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -473,36 +647,98 @@ void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) { } -void LocationsBuilderX86::VisitEqual(HEqual* equal) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); +void LocationsBuilderX86::VisitCondition(HCondition* comp) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); - equal->SetLocations(locations); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) { - LocationSummary* locations = equal->GetLocations(); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), - locations->InAt(1).AsX86().AsCpuRegister()); - } else { - __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), - Address(ESP, locations->InAt(1).GetStackIndex())); +void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { + if (comp->NeedsMaterialization()) { + LocationSummary* locations = comp->GetLocations(); + Register reg = locations->Out().AsX86().AsCpuRegister(); + // Clear register: setcc only sets the low byte. + __ xorl(reg, reg); + if (locations->InAt(1).IsRegister()) { + __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), + locations->InAt(1).AsX86().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + HConstant* instruction = locations->InAt(1).GetConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), imm); + } else { + __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), + Address(ESP, locations->InAt(1).GetStackIndex())); + } + __ setb(X86Condition(comp->GetCondition()), reg); } - __ setb(kEqual, locations->Out().AsX86().AsCpuRegister()); +} + +void LocationsBuilderX86::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); } void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { - constant->SetLocations(nullptr); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); } void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { - // Will be generated at use site. } void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { - constant->SetLocations(nullptr); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); } void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) { @@ -519,7 +755,8 @@ void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) { } void LocationsBuilderX86::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); switch (ret->InputAt(0)->GetType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -538,7 +775,6 @@ void LocationsBuilderX86::VisitReturn(HReturn* ret) { default: LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); } - ret->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { @@ -566,8 +802,43 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); - locations->AddTemp(Location::RequiresRegister()); + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { + Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister(); + uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>); + size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() + + invoke->GetIndexInDexCache() * kX86WordSize; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, index_in_cache)); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); + + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { + HandleInvoke(invoke); +} + +void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); + locations->AddTemp(X86CpuLocation(EAX)); InvokeDexCallingConventionVisitor calling_convention_visitor; for (size_t i = 0; i < invoke->InputCount(); i++) { @@ -601,32 +872,32 @@ void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { invoke->SetLocations(locations); } -void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { +void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister(); - size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - invoke->GetIndexInDexCache() * kX86WordSize; - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. - - // temp = method; - LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, index_in_cache)); - // (temp + offset_of_quick_compiled_code)() + uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + LocationSummary* locations = invoke->GetLocations(); + Location receiver = locations->InAt(0); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + // temp = object->GetClass(); + if (receiver.IsStackSlot()) { + __ movl(temp, Address(ESP, receiver.GetStackIndex())); + __ movl(temp, Address(temp, class_offset)); + } else { + __ movl(temp, Address(receiver.AsX86().AsCpuRegister(), class_offset)); + } + // temp = temp->GetMethodAt(method_offset); + __ movl(temp, Address(temp, method_offset)); + // call temp->GetEntryPoint(); __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); - codegen_->RecordPcInfo(invoke->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitAdd(HAdd* add) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { @@ -646,7 +917,6 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { default: LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); } - add->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { @@ -658,6 +928,10 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { if (locations->InAt(1).IsRegister()) { __ addl(locations->InAt(0).AsX86().AsCpuRegister(), locations->InAt(1).AsX86().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + HConstant* instruction = locations->InAt(1).GetConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ addl(locations->InAt(0).AsX86().AsCpuRegister(), imm); } else { __ addl(locations->InAt(0).AsX86().AsCpuRegister(), Address(ESP, locations->InAt(1).GetStackIndex())); @@ -695,7 +969,8 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { } void LocationsBuilderX86::VisitSub(HSub* sub) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { case Primitive::kPrimInt: case Primitive::kPrimLong: { @@ -715,7 +990,6 @@ void LocationsBuilderX86::VisitSub(HSub* sub) { default: LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); } - sub->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { @@ -727,6 +1001,10 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { if (locations->InAt(1).IsRegister()) { __ subl(locations->InAt(0).AsX86().AsCpuRegister(), locations->InAt(1).AsX86().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + HConstant* instruction = locations->InAt(1).GetConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ subl(locations->InAt(0).AsX86().AsCpuRegister(), imm); } else { __ subl(locations->InAt(0).AsX86().AsCpuRegister(), Address(ESP, locations->InAt(1).GetStackIndex())); @@ -764,12 +1042,12 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { } void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(X86CpuLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(X86CpuLocation(calling_convention.GetRegisterAt(0))); locations->AddTemp(X86CpuLocation(calling_convention.GetRegisterAt(1))); - instruction->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { @@ -780,11 +1058,13 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { __ fs()->call( Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck))); - codegen_->RecordPcInfo(instruction->GetDexPc()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -792,18 +1072,16 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); } locations->SetOut(location); - instruction->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitParameterValue(HParameterValue* instruction) { - // Nothing to do, the parameter is already at its location. } void LocationsBuilderX86::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); - instruction->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { @@ -813,20 +1091,619 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { __ xorl(out.AsX86().AsCpuRegister(), Immediate(1)); } +void LocationsBuilderX86::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + Label less, greater, done; + Register output = locations->Out().AsX86().AsCpuRegister(); + X86ManagedRegister left = locations->InAt(0).AsX86(); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ cmpl(left.AsRegisterPairHigh(), right.AsX86().AsRegisterPairHigh()); + } else { + DCHECK(right.IsDoubleStackSlot()); + __ cmpl(left.AsRegisterPairHigh(), Address(ESP, right.GetHighStackIndex(kX86WordSize))); + } + __ j(kLess, &less); // Signed compare. + __ j(kGreater, &greater); // Signed compare. + if (right.IsRegister()) { + __ cmpl(left.AsRegisterPairLow(), right.AsX86().AsRegisterPairLow()); + } else { + DCHECK(right.IsDoubleStackSlot()); + __ cmpl(left.AsRegisterPairLow(), Address(ESP, right.GetStackIndex())); + } + __ movl(output, Immediate(0)); + __ j(kEqual, &done); + __ j(kBelow, &less); // Unsigned compare. + + __ Bind(&greater); + __ movl(output, Immediate(1)); + __ jmp(&done); + + __ Bind(&less); + __ movl(output, Immediate(-1)); + + __ Bind(&done); + break; + } + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } +} + void LocationsBuilderX86::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); } void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; +} + +void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + Primitive::Type field_type = instruction->GetFieldType(); + if (field_type == Primitive::kPrimBoolean || field_type == Primitive::kPrimByte) { + // Ensure the value is in a byte register. + locations->SetInAt(1, X86CpuLocation(EAX)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // Temporary registers for the write barrier. + if (field_type == Primitive::kPrimNot) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(X86CpuLocation(ECX)); + } +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Primitive::Type field_type = instruction->GetFieldType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + ByteRegister value = locations->InAt(1).AsX86().AsByteRegister(); + __ movb(Address(obj, offset), value); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + Register value = locations->InAt(1).AsX86().AsCpuRegister(); + __ movw(Address(obj, offset), value); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register value = locations->InAt(1).AsX86().AsCpuRegister(); + __ movl(Address(obj, offset), value); + + if (field_type == Primitive::kPrimNot) { + Register temp = locations->GetTemp(0).AsX86().AsCpuRegister(); + Register card = locations->GetTemp(1).AsX86().AsCpuRegister(); + codegen_->MarkGCCard(temp, card, obj, value); + } + break; + } + + case Primitive::kPrimLong: { + X86ManagedRegister value = locations->InAt(1).AsX86(); + __ movl(Address(obj, offset), value.AsRegisterPairLow()); + __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh()); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value())); + __ movl(temp, object); + __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), + X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); + __ Bind(&is_null); +} + +void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movzxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimByte: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movsxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimShort: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movsxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimChar: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movzxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movl(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimLong: { + // TODO: support volatile. + X86ManagedRegister out = locations->Out().AsX86(); + __ movl(out.AsRegisterPairLow(), Address(obj, offset)); + __ movl(out.AsRegisterPairHigh(), Address(obj, kX86WordSize + offset)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::Any()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmpl(obj.AsX86().AsCpuRegister(), Immediate(0)); + } else { + DCHECK(locations->InAt(0).IsStackSlot()); + __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, slow_path->GetEntryLabel()); +} + +void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + Location index = locations->InAt(1); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + Register out = locations->Out().AsX86().AsCpuRegister(); + if (index.IsConstant()) { + __ movzxb(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); + } else { + __ movzxb(out, Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset)); + } + break; + } + + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); + Register out = locations->Out().AsX86().AsCpuRegister(); + if (index.IsConstant()) { + __ movsxb(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); + } else { + __ movsxb(out, Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset)); + } + break; + } + + case Primitive::kPrimShort: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); + Register out = locations->Out().AsX86().AsCpuRegister(); + if (index.IsConstant()) { + __ movsxw(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + } else { + __ movsxw(out, Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset)); + } + break; + } + + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Register out = locations->Out().AsX86().AsCpuRegister(); + if (index.IsConstant()) { + __ movzxw(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + } else { + __ movzxw(out, Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset)); + } + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register out = locations->Out().AsX86().AsCpuRegister(); + if (index.IsConstant()) { + __ movl(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); + } else { + __ movl(out, Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset)); + } + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + X86ManagedRegister out = locations->Out().AsX86(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ movl(out.AsRegisterPairLow(), Address(obj, offset)); + __ movl(out.AsRegisterPairHigh(), Address(obj, offset + kX86WordSize)); + } else { + __ movl(out.AsRegisterPairLow(), + Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset)); + __ movl(out.AsRegisterPairHigh(), + Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize)); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, + value_type == Primitive::kPrimNot ? LocationSummary::kCall : LocationSummary::kNoCall); + + if (value_type == Primitive::kPrimNot) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, X86CpuLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, X86CpuLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, X86CpuLocation(calling_convention.GetRegisterAt(2))); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (value_type == Primitive::kPrimBoolean || value_type == Primitive::kPrimByte) { + // Ensure the value is in a byte register. + locations->SetInAt(2, X86CpuLocation(EAX)); + } else { + locations->SetInAt(2, Location::RequiresRegister()); + } + } +} + +void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + Location index = locations->InAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + ByteRegister value = locations->InAt(2).AsX86().AsByteRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + __ movb(Address(obj, offset), value); + } else { + __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset), value); + } + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Register value = locations->InAt(2).AsX86().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ movw(Address(obj, offset), value); + } else { + __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset), value); + } + break; + } + + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register value = locations->InAt(2).AsX86().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(Address(obj, offset), value); + } else { + __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset), value); + } + break; + } + + case Primitive::kPrimNot: { + DCHECK(!codegen_->IsLeafMethod()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAputObject))); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + X86ManagedRegister value = locations->InAt(2).AsX86(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ movl(Address(obj, offset), value.AsRegisterPairLow()); + __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh()); + } else { + __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset), + value.AsRegisterPairLow()); + __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize), + value.AsRegisterPairHigh()); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); + Register obj = locations->InAt(0).AsX86().AsCpuRegister(); + Register out = locations->Out().AsX86().AsCpuRegister(); + __ movl(out, Address(obj, offset)); +} + +void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86( + instruction, locations->InAt(0), locations->InAt(1)); + codegen_->AddSlowPath(slow_path); + + Register index = locations->InAt(0).AsX86().AsCpuRegister(); + Register length = locations->InAt(1).AsX86().AsCpuRegister(); + + __ cmpl(index, length); + __ j(kAboveEqual, slow_path->GetEntryLabel()); +} + +void LocationsBuilderX86::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. } void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathX86* slow_path = + new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + codegen_->AddSlowPath(slow_path); + __ fs()->cmpw(Address::Absolute( + Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); + if (successor == nullptr) { + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ j(kEqual, codegen_->GetLabelOf(successor)); + __ jmp(slow_path->GetEntryLabel()); + } +} + +X86Assembler* ParallelMoveResolverX86::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) { + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister()); + } else { + DCHECK(destination.IsStackSlot()); + __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex())); + } else { + DCHECK(destination.IsStackSlot()); + MoveMemoryToMemory(destination.GetStackIndex(), + source.GetStackIndex()); + } + } else if (source.IsConstant()) { + HIntConstant* instruction = source.GetConstant()->AsIntConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + if (destination.IsRegister()) { + __ movl(destination.AsX86().AsCpuRegister(), imm); + } else { + __ movl(Address(ESP, destination.GetStackIndex()), imm); + } + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverX86::Exchange(Register reg, int mem) { + Register suggested_scratch = reg == EAX ? EBX : EAX; + ScratchRegisterScope ensure_scratch( + this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); + __ movl(Address(ESP, mem + stack_offset), reg); + __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch1( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + + Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; + ScratchRegisterScope ensure_scratch2( + this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; + stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); +} + +void ParallelMoveResolverX86::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister() && destination.IsRegister()) { + __ xchgl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister()); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(source.AsX86().AsCpuRegister(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(destination.AsX86().AsCpuRegister(), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + Exchange(destination.GetStackIndex(), source.GetStackIndex()); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverX86::SpillScratch(int reg) { + __ pushl(static_cast<Register>(reg)); +} + +void ParallelMoveResolverX86::RestoreScratch(int reg) { + __ popl(static_cast<Register>(reg)); } } // namespace x86 diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 4a706363b2..23145bfb70 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "nodes.h" +#include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" namespace art { @@ -59,6 +60,28 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; +class ParallelMoveResolverX86 : public ParallelMoveResolver { + public: + ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + virtual void EmitMove(size_t index) OVERRIDE; + virtual void EmitSwap(size_t index) OVERRIDE; + virtual void SpillScratch(int reg) OVERRIDE; + virtual void RestoreScratch(int reg) OVERRIDE; + + X86Assembler* GetAssembler() const; + + private: + void Exchange(Register reg, int mem); + void Exchange(int mem1, int mem2); + void MoveMemoryToMemory(int dst, int src); + + CodeGeneratorX86* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86); +}; + class LocationsBuilderX86 : public HGraphVisitor { public: LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen) @@ -67,10 +90,12 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name) \ virtual void Visit##name(H##name* instr); - FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void HandleInvoke(HInvoke* invoke); + private: CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -85,7 +110,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name) \ virtual void Visit##name(H##name* instr); - FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -94,6 +119,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { X86Assembler* GetAssembler() const { return assembler_; } private: + // Generate code for the given suspend check. If not null, `successor` + // is the block to branch to if the suspend check is not needed, and after + // the suspend call. + void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -103,17 +133,21 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: explicit CodeGeneratorX86(HGraph* graph); - virtual ~CodeGeneratorX86() { } + virtual ~CodeGeneratorX86() {} virtual void GenerateFrameEntry() OVERRIDE; virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86WordSize; } + virtual size_t FrameEntrySpillSize() const OVERRIDE; + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -131,7 +165,6 @@ class CodeGeneratorX86 : public CodeGenerator { virtual ManagedRegister AllocateFreeRegister( Primitive::Type type, bool* blocked_registers) const OVERRIDE; - int32_t GetStackSlot(HLocal* local) const; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { @@ -145,14 +178,26 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - private: + ParallelMoveResolverX86* GetMoveResolver() { + return &move_resolver_; + } + + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kX86; + } + // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); + // Emit a write barrier. + void MarkGCCard(Register temp, Register card, Register object, Register value); + + private: LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; + ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc new file mode 100644 index 0000000000..78c7d9d81b --- /dev/null +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -0,0 +1,1592 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86_64.h" + +#include "entrypoints/quick/quick_entrypoints.h" +#include "gc/accounting/card_table.h" +#include "mirror/array.h" +#include "mirror/art_method.h" +#include "mirror/class.h" +#include "mirror/object_reference.h" +#include "thread.h" +#include "utils/assembler.h" +#include "utils/stack_checks.h" +#include "utils/x86_64/assembler_x86_64.h" +#include "utils/x86_64/managed_register_x86_64.h" + +namespace art { + +x86_64::X86_64ManagedRegister Location::AsX86_64() const { + return reg().AsX86_64(); +} + +namespace x86_64 { + +static constexpr bool kExplicitStackOverflowCheck = false; + +// Some x86_64 instructions require a register to be available as temp. +static constexpr Register TMP = R11; + +static constexpr int kNumberOfPushedRegistersAtEntry = 1; +static constexpr int kCurrentMethodStackOffset = 0; + +static Location X86_64CpuLocation(Register reg) { + return Location::RegisterLocation(X86_64ManagedRegister::FromCpuRegister(reg)); +} + +static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> + +class NullCheckSlowPathX86_64 : public SlowPathCode { + public: + explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ gs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true)); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HNullCheck* const instruction_; + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); +}; + +class StackOverflowCheckSlowPathX86_64 : public SlowPathCode { + public: + StackOverflowCheckSlowPathX86_64() {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + __ addq(CpuRegister(RSP), + Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + __ gs()->jmp( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true)); + } + + private: + DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64); +}; + +class SuspendCheckSlowPathX86_64 : public SlowPathCode { + public: + explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) + : instruction_(instruction), successor_(successor) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true)); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); + if (successor_ == nullptr) { + __ jmp(GetReturnLabel()); + } else { + __ jmp(codegen->GetLabelOf(successor_)); + } + } + + Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + private: + HSuspendCheck* const instruction_; + HBasicBlock* const successor_; + Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); +}; + +class BoundsCheckSlowPathX86_64 : public SlowPathCode { + public: + BoundsCheckSlowPathX86_64(HBoundsCheck* instruction, + Location index_location, + Location length_location) + : instruction_(instruction), + index_location_(index_location), + length_location_(length_location) {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x64_codegen = reinterpret_cast<CodeGeneratorX86_64*>(codegen); + __ Bind(GetEntryLabel()); + InvokeRuntimeCallingConvention calling_convention; + x64_codegen->Move(X86_64CpuLocation(calling_convention.GetRegisterAt(0)), index_location_); + x64_codegen->Move(X86_64CpuLocation(calling_convention.GetRegisterAt(1)), length_location_); + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true)); + codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + } + + private: + HBoundsCheck* const instruction_; + const Location index_location_; + const Location length_location_; + + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); +}; + +#undef __ +#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> + +inline Condition X86_64Condition(IfCondition cond) { + switch (cond) { + case kCondEQ: return kEqual; + case kCondNE: return kNotEqual; + case kCondLT: return kLess; + case kCondLE: return kLessEqual; + case kCondGT: return kGreater; + case kCondGE: return kGreaterEqual; + default: + LOG(FATAL) << "Unknown if condition"; + } + return kEqual; +} + +void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { + stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); +} + +void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { + stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); +} + +void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id)); +} + +void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex())); +} + +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) + : CodeGenerator(graph, kNumberOfRegIds), + location_builder_(graph, this), + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this) {} + +size_t CodeGeneratorX86_64::FrameEntrySpillSize() const { + return kNumberOfPushedRegistersAtEntry * kX86_64WordSize; +} + +InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, + CodeGeneratorX86_64* codegen) + : HGraphVisitor(graph), + assembler_(codegen->GetAssembler()), + codegen_(codegen) {} + +ManagedRegister CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type, + bool* blocked_registers) const { + switch (type) { + case Primitive::kPrimLong: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters); + return X86_64ManagedRegister::FromCpuRegister(static_cast<Register>(reg)); + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } + + return ManagedRegister::NoRegister(); +} + +void CodeGeneratorX86_64::SetupBlockedRegisters(bool* blocked_registers) const { + // Stack register is always reserved. + blocked_registers[RSP] = true; + + // Block the register used as TMP. + blocked_registers[TMP] = true; + + // TODO: We currently don't use Quick's callee saved registers. + blocked_registers[RBX] = true; + blocked_registers[RBP] = true; + blocked_registers[R12] = true; + blocked_registers[R13] = true; + blocked_registers[R14] = true; + blocked_registers[R15] = true; +} + +void CodeGeneratorX86_64::GenerateFrameEntry() { + // Create a fake register to mimic Quick. + static const int kFakeReturnRegister = 16; + core_spill_mask_ |= (1 << kFakeReturnRegister); + + bool skip_overflow_check = IsLeafMethod() + && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); + + if (!skip_overflow_check && !kExplicitStackOverflowCheck) { + __ testq(CpuRegister(RAX), Address( + CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); + RecordPcInfo(nullptr, 0); + } + + // The return PC has already been pushed on the stack. + __ subq(CpuRegister(RSP), + Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + + if (!skip_overflow_check && kExplicitStackOverflowCheck) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64(); + AddSlowPath(slow_path); + + __ gs()->cmpq(CpuRegister(RSP), + Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true)); + __ j(kLess, slow_path->GetEntryLabel()); + } + + __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); +} + +void CodeGeneratorX86_64::GenerateFrameExit() { + __ addq(CpuRegister(RSP), + Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); +} + +void CodeGeneratorX86_64::Bind(Label* label) { + __ Bind(label); +} + +void InstructionCodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); +} + +Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { + switch (load->GetType()) { + case Primitive::kPrimLong: + return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); + break; + + case Primitive::kPrimInt: + case Primitive::kPrimNot: + return Location::StackSlot(GetStackSlot(load->GetLocal())); + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented type " << load->GetType(); + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type " << load->GetType(); + } + + LOG(FATAL) << "Unreachable"; + return Location(); +} + +void CodeGeneratorX86_64::Move(Location destination, Location source) { + if (source.Equals(destination)) { + return; + } + if (destination.IsRegister()) { + if (source.IsRegister()) { + __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); + } else if (source.IsStackSlot()) { + __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(source.IsDoubleStackSlot()); + __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex())); + } + } else if (destination.IsStackSlot()) { + if (source.IsRegister()) { + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(source.IsStackSlot()); + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + if (source.IsRegister()) { + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(source.IsDoubleStackSlot()); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } +} + +void CodeGeneratorX86_64::Move(HInstruction* instruction, + Location location, + HInstruction* move_for) { + if (instruction->AsIntConstant() != nullptr) { + Immediate imm(instruction->AsIntConstant()->GetValue()); + if (location.IsRegister()) { + __ movl(location.AsX86_64().AsCpuRegister(), imm); + } else { + __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm); + } + } else if (instruction->AsLongConstant() != nullptr) { + int64_t value = instruction->AsLongConstant()->GetValue(); + if (location.IsRegister()) { + __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value)); + } else { + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); + } + } else if (instruction->AsLoadLocal() != nullptr) { + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); + break; + + case Primitive::kPrimLong: + Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); + break; + + default: + LOG(FATAL) << "Unimplemented local type " << instruction->GetType(); + } + } else { + DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + Move(location, instruction->GetLocations()->Out()); + break; + + default: + LOG(FATAL) << "Unimplemented type " << instruction->GetType(); + } + } +} + +void LocationsBuilderX86_64::VisitGoto(HGoto* got) { + got->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { + HBasicBlock* successor = got->GetSuccessor(); + DCHECK(!successor->IsExitBlock()); + + HBasicBlock* block = got->GetBlock(); + HInstruction* previous = got->GetPrevious(); + + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) { + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; + } + + if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { + __ jmp(codegen_->GetLabelOf(successor)); + } +} + +void LocationsBuilderX86_64::VisitExit(HExit* exit) { + exit->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { + if (kIsDebugBuild) { + __ Comment("Unreachable"); + __ int3(); + } +} + +void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { + HInstruction* cond = if_instr->InputAt(0); + DCHECK(cond->IsCondition()); + HCondition* condition = cond->AsCondition(); + if (condition->NeedsMaterialization()) { + // Moves do not affect the eflags register, so if the condition is evaluated + // just before the if, we don't need to evaluate it again. + if (!condition->IsBeforeWhenDisregardMoves(if_instr)) { + // Materialized condition, compare against 0. + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); + } + } + __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + } else { + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister()); + } else if (rhs.IsConstant()) { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), + Immediate(rhs.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), rhs.GetStackIndex())); + } + __ j(X86_64Condition(condition->GetCondition()), + codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + } + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + } +} + +void LocationsBuilderX86_64::VisitLocal(HLocal* local) { + local->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) { + DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock()); +} + +void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) { + local->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load) { + // Nothing to do, this is driven by the code generator. +} + +void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall); + switch (store->InputAt(1)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal()))); + break; + + case Primitive::kPrimLong: + locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal()))); + break; + + default: + LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) { +} + +void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + if (comp->NeedsMaterialization()) { + locations->SetOut(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { + if (comp->NeedsMaterialization()) { + LocationSummary* locations = comp->GetLocations(); + CpuRegister reg = locations->Out().AsX86_64().AsCpuRegister(); + // Clear register: setcc only sets the low byte. + __ xorq(reg, reg); + if (locations->InAt(1).IsRegister()) { + __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), + Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue())); + } else { + __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + } + __ setcc(X86_64Condition(comp->GetCondition()), reg); + } +} + +void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); +} + +void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { + VisitCondition(comp); +} + +void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: + __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } + + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0)); + __ j(kEqual, &done); + __ j(kGreater, &greater); + + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1)); + __ jmp(&done); + + __ Bind(&greater); + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1)); + + __ Bind(&done); +} + +void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) { +} + +void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) { +} + +void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { + ret->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) { + codegen_->GenerateFrameExit(); + __ ret(); +} + +void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); + switch (ret->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + locations->SetInAt(0, X86_64CpuLocation(RAX)); + break; + + default: + LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { + if (kIsDebugBuild) { + switch (ret->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX); + break; + + default: + LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); + } + } + codegen_->GenerateFrameExit(); + __ ret(); +} + +Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + uint32_t index = gp_index_++; + stack_index_++; + if (index < calling_convention.GetNumberOfRegisters()) { + return X86_64CpuLocation(calling_convention.GetRegisterAt(index)); + } else { + return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); + } + } + + case Primitive::kPrimLong: { + uint32_t index = gp_index_; + stack_index_ += 2; + if (index < calling_convention.GetNumberOfRegisters()) { + gp_index_ += 1; + return X86_64CpuLocation(calling_convention.GetRegisterAt(index)); + } else { + gp_index_ += 2; + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); + } + } + + case Primitive::kPrimDouble: + case Primitive::kPrimFloat: + LOG(FATAL) << "Unimplemented parameter type " << type; + break; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected parameter type " << type; + break; + } + return Location(); +} + +void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister(); + uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>); + size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() + + invoke->GetIndexInDexCache() * heap_reference_size; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, index_in_cache)); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue())); + + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + HandleInvoke(invoke); +} + +void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); + locations->AddTemp(X86_64CpuLocation(RDI)); + + InvokeDexCallingConventionVisitor calling_convention_visitor; + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); + } + + switch (invoke->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + locations->SetOut(X86_64CpuLocation(RAX)); + break; + + case Primitive::kPrimVoid: + break; + + case Primitive::kPrimDouble: + case Primitive::kPrimFloat: + LOG(FATAL) << "Unimplemented return type " << invoke->GetType(); + break; + } +} + +void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister(); + size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + LocationSummary* locations = invoke->GetLocations(); + Location receiver = locations->InAt(0); + size_t class_offset = mirror::Object::ClassOffset().SizeValue(); + // temp = object->GetClass(); + if (receiver.IsStackSlot()) { + __ movq(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); + __ movq(temp, Address(temp, class_offset)); + } else { + __ movq(temp, Address(receiver.AsX86_64().AsCpuRegister(), class_offset)); + } + // temp = temp->GetMethodAt(method_offset); + __ movl(temp, Address(temp, method_offset)); + // call temp->GetEntryPoint(); + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue())); + + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderX86_64::VisitAdd(HAdd* add) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { + LocationSummary* locations = add->GetLocations(); + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + if (locations->InAt(1).IsRegister()) { + __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + HConstant* instruction = locations->InAt(1).GetConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm); + } else { + __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + } + break; + } + case Primitive::kPrimLong: { + __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitSub(HSub* sub) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { + LocationSummary* locations = sub->GetLocations(); + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: { + if (locations->InAt(1).IsRegister()) { + __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + } else if (locations->InAt(1).IsConstant()) { + HConstant* instruction = locations->InAt(1).GetConstant(); + Immediate imm(instruction->AsIntConstant()->GetValue()); + __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm); + } else { + __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + } + break; + } + case Primitive::kPrimLong: { + __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); + locations->SetOut(X86_64CpuLocation(RAX)); +} + +void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { + InvokeRuntimeCallingConvention calling_convention; + LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true)); + + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorX86_64::VisitParameterValue(HParameterValue* instruction) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderX86_64::VisitNot(HNot* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86_64::VisitNot(HNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + __ xorq(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1)); +} + +void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers for the write barrier. + if (instruction->GetFieldType() == Primitive::kPrimNot) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister value = locations->InAt(1).AsX86_64().AsCpuRegister(); + size_t offset = instruction->GetFieldOffset().SizeValue(); + Primitive::Type field_type = instruction->GetFieldType(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + __ movb(Address(obj, offset), value); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + __ movw(Address(obj, offset), value); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + __ movl(Address(obj, offset), value); + if (field_type == Primitive::kPrimNot) { + CpuRegister temp = locations->GetTemp(0).AsX86_64().AsCpuRegister(); + CpuRegister card = locations->GetTemp(1).AsX86_64().AsCpuRegister(); + codegen_->MarkGCCard(temp, card, obj, value); + } + break; + } + + case Primitive::kPrimLong: { + __ movq(Address(obj, offset), value); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << field_type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + } +} + +void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + size_t offset = instruction->GetFieldOffset().SizeValue(); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + __ movzxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimByte: { + __ movsxb(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimShort: { + __ movsxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimChar: { + __ movzxw(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + __ movl(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimLong: { + __ movq(out, Address(obj, offset)); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::Any()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction); + codegen_->AddSlowPath(slow_path); + + LocationSummary* locations = instruction->GetLocations(); + Location obj = locations->InAt(0); + DCHECK(obj.Equals(locations->Out())); + + if (obj.IsRegister()) { + __ cmpl(obj.AsX86_64().AsCpuRegister(), Immediate(0)); + } else { + DCHECK(locations->InAt(0).IsStackSlot()); + __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); + } + __ j(kEqual, slow_path->GetEntryLabel()); +} + +void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + Location index = locations->InAt(1); + + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movzxb(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); + } else { + __ movzxb(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset)); + } + break; + } + + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movsxb(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); + } else { + __ movsxb(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset)); + } + break; + } + + case Primitive::kPrimShort: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movsxw(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + } else { + __ movsxw(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset)); + } + break; + } + + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movzxw(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); + } else { + __ movzxw(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset)); + } + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movl(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); + } else { + __ movl(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset)); + } + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + __ movq(out, Address(obj, + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); + } else { + __ movq(out, Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset)); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + bool is_object = value_type == Primitive::kPrimNot; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall); + if (is_object) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, X86_64CpuLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, X86_64CpuLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, X86_64CpuLocation(calling_convention.GetRegisterAt(2))); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + Location index = locations->InAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + __ movb(Address(obj, offset), value); + } else { + __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset), value); + } + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ movw(Address(obj, offset), value); + } else { + __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset), value); + } + break; + } + + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(Address(obj, offset), value); + } else { + __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset), value); + } + break; + } + + case Primitive::kPrimNot: { + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), true)); + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + break; + } + + case Primitive::kPrimLong: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister(); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ movq(Address(obj, offset), value); + } else { + __ movq(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset), value); + } + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << instruction->GetType(); + } +} + +void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); + CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister out = locations->Out().AsX86_64().AsCpuRegister(); + __ movl(out, Address(obj, offset)); +} + +void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // TODO: Have a normalization phase that makes this instruction never used. + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64( + instruction, locations->InAt(0), locations->InAt(1)); + codegen_->AddSlowPath(slow_path); + + CpuRegister index = locations->InAt(0).AsX86_64().AsCpuRegister(); + CpuRegister length = locations->InAt(1).AsX86_64().AsCpuRegister(); + + __ cmpl(index, length); + __ j(kAboveEqual, slow_path->GetEntryLabel()); +} + +void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, + CpuRegister card, + CpuRegister object, + CpuRegister value) { + Label is_null; + __ testl(value, value); + __ j(kEqual, &is_null); + __ gs()->movq(card, Address::Absolute( + Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true)); + __ movq(temp, object); + __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); + __ movb(Address(temp, card, TIMES_1, 0), card); + __ Bind(&is_null); +} + +void LocationsBuilderX86_64::VisitTemporary(HTemporary* temp) { + temp->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp) { + // Nothing to do, this is driven by the code generator. +} + +void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathX86_64* slow_path = + new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + codegen_->AddSlowPath(slow_path); + __ gs()->cmpw(Address::Absolute( + Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); + if (successor == nullptr) { + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ j(kEqual, codegen_->GetLabelOf(successor)); + __ jmp(slow_path->GetEntryLabel()); + } +} + +X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverX86_64::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); + } else if (destination.IsStackSlot()) { + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.AsX86_64().AsCpuRegister()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + __ movl(destination.AsX86_64().AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsStackSlot()); + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } else if (source.IsDoubleStackSlot()) { + if (destination.IsRegister()) { + __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + if (constant->IsIntConstant()) { + Immediate imm(constant->AsIntConstant()->GetValue()); + if (destination.IsRegister()) { + __ movl(destination.AsX86_64().AsCpuRegister(), imm); + } else { + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + if (destination.IsRegister()) { + __ movq(destination.AsX86_64().AsCpuRegister(), Immediate(value)); + } else { + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } + } else { + LOG(FATAL) << "Unimplemented constant type"; + } + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movl(Address(CpuRegister(RSP), mem), reg); + __ movl(reg, CpuRegister(TMP)); +} + +void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movl(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movq(Address(CpuRegister(RSP), mem), reg); + __ movq(reg, CpuRegister(TMP)); +} + +void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86_64::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister() && destination.IsRegister()) { + __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + Exchange32(destination.GetStackIndex(), source.GetStackIndex()); + } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { + Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { + Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + + +void ParallelMoveResolverX86_64::SpillScratch(int reg) { + __ pushq(CpuRegister(reg)); +} + + +void ParallelMoveResolverX86_64::RestoreScratch(int reg) { + __ popq(CpuRegister(reg)); +} + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h new file mode 100644 index 0000000000..a299cf6476 --- /dev/null +++ b/compiler/optimizing/code_generator_x86_64.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ + +#include "code_generator.h" +#include "nodes.h" +#include "parallel_move_resolver.h" +#include "utils/x86_64/assembler_x86_64.h" + +namespace art { +namespace x86_64 { + +static constexpr size_t kX86_64WordSize = 8; + +static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; + +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); + +class InvokeDexCallingConvention : public CallingConvention<Register> { + public: + InvokeDexCallingConvention() + : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); +}; + +class InvokeDexCallingConventionVisitor { + public: + InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {} + + Location GetNextLocation(Primitive::Type type); + + private: + InvokeDexCallingConvention calling_convention; + uint32_t gp_index_; + uint32_t stack_index_; + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); +}; + +class CodeGeneratorX86_64; + +class ParallelMoveResolverX86_64 : public ParallelMoveResolver { + public: + ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + virtual void EmitMove(size_t index) OVERRIDE; + virtual void EmitSwap(size_t index) OVERRIDE; + virtual void SpillScratch(int reg) OVERRIDE; + virtual void RestoreScratch(int reg) OVERRIDE; + + X86_64Assembler* GetAssembler() const; + + private: + void Exchange32(CpuRegister reg, int mem); + void Exchange32(int mem1, int mem2); + void Exchange64(CpuRegister reg, int mem); + void Exchange64(int mem1, int mem2); + + CodeGeneratorX86_64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); +}; + +class LocationsBuilderX86_64 : public HGraphVisitor { + public: + LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) + : HGraphVisitor(graph), codegen_(codegen) {} + +#define DECLARE_VISIT_INSTRUCTION(name) \ + virtual void Visit##name(H##name* instr); + + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void HandleInvoke(HInvoke* invoke); + + private: + CodeGeneratorX86_64* const codegen_; + InvokeDexCallingConventionVisitor parameter_visitor_; + + DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); +}; + +class InstructionCodeGeneratorX86_64 : public HGraphVisitor { + public: + InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); + +#define DECLARE_VISIT_INSTRUCTION(name) \ + virtual void Visit##name(H##name* instr); + + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void LoadCurrentMethod(CpuRegister reg); + + X86_64Assembler* GetAssembler() const { return assembler_; } + + private: + // Generate code for the given suspend check. If not null, `successor` + // is the block to branch to if the suspend check is not needed, and after + // the suspend call. + void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); + + X86_64Assembler* const assembler_; + CodeGeneratorX86_64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); +}; + +class CodeGeneratorX86_64 : public CodeGenerator { + public: + explicit CodeGeneratorX86_64(HGraph* graph); + virtual ~CodeGeneratorX86_64() {} + + virtual void GenerateFrameEntry() OVERRIDE; + virtual void GenerateFrameExit() OVERRIDE; + virtual void Bind(Label* label) OVERRIDE; + virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + + virtual size_t GetWordSize() const OVERRIDE { + return kX86_64WordSize; + } + + virtual size_t FrameEntrySpillSize() const OVERRIDE; + + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { + return &location_builder_; + } + + virtual HGraphVisitor* GetInstructionVisitor() OVERRIDE { + return &instruction_visitor_; + } + + virtual X86_64Assembler* GetAssembler() OVERRIDE { + return &assembler_; + } + + ParallelMoveResolverX86_64* GetMoveResolver() { + return &move_resolver_; + } + + virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; + + virtual size_t GetNumberOfRegisters() const OVERRIDE { + return kNumberOfRegIds; + } + + virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { + return kNumberOfCpuRegisters; + } + + virtual size_t GetNumberOfFloatingPointRegisters() const OVERRIDE { + return kNumberOfFloatRegisters; + } + + virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE; + virtual ManagedRegister AllocateFreeRegister( + Primitive::Type type, bool* blocked_registers) const OVERRIDE; + virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; + virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kX86_64; + } + + // Emit a write barrier. + void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object, CpuRegister value); + + // Helper method to move a value between two locations. + void Move(Location destination, Location source); + + private: + LocationsBuilderX86_64 location_builder_; + InstructionCodeGeneratorX86_64 instruction_visitor_; + ParallelMoveResolverX86_64 move_resolver_; + X86_64Assembler assembler_; + + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 7684bb189d..7161eed9f9 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -15,7 +15,9 @@ */ #include "builder.h" -#include "code_generator.h" +#include "code_generator_arm.h" +#include "code_generator_x86.h" +#include "code_generator_x86_64.h" #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" @@ -47,33 +49,53 @@ class InternalCodeAllocator : public CodeAllocator { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; +static void Run(const InternalCodeAllocator& allocator, + const CodeGenerator& codegen, + bool has_result, + int32_t expected) { + typedef int32_t (*fptr)(); + CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); + fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); + if (codegen.GetInstructionSet() == kThumb2) { + // For thumb we need the bottom bit set. + f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); + } + int32_t result = f(); + if (has_result) { + CHECK_EQ(result, expected); + } +} + static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); HGraphBuilder builder(&arena); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); + // Remove suspend checks, they cannot be executed in this context. + RemoveSuspendChecks(graph); ASSERT_NE(graph, nullptr); InternalCodeAllocator allocator; - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86); - codegen->Compile(&allocator); - typedef int32_t (*fptr)(); -#if defined(__i386__) - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); - if (has_result) { - CHECK_EQ(result, expected); + + x86::CodeGeneratorX86 codegenX86(graph); + // We avoid doing a stack overflow check that requires the runtime being setup, + // by making sure the compiler knows the methods we are running are leaf methods. + codegenX86.CompileBaseline(&allocator, true); + if (kRuntimeISA == kX86) { + Run(allocator, codegenX86, has_result, expected); } -#endif - codegen = CodeGenerator::Create(&arena, graph, kArm); - codegen->Compile(&allocator); -#if defined(__arm__) - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); - if (has_result) { - CHECK_EQ(result, expected); + + arm::CodeGeneratorARM codegenARM(graph); + codegenARM.CompileBaseline(&allocator, true); + if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { + Run(allocator, codegenARM, has_result, expected); + } + + x86_64::CodeGeneratorX86_64 codegenX86_64(graph); + codegenX86_64.CompileBaseline(&allocator, true); + if (kRuntimeISA == kX86_64) { + Run(allocator, codegenX86_64, has_result, expected); } -#endif } TEST(CodegenTest, ReturnVoid) { diff --git a/compiler/optimizing/constant_propagation.cc b/compiler/optimizing/constant_propagation.cc new file mode 100644 index 0000000000..d675164fa4 --- /dev/null +++ b/compiler/optimizing/constant_propagation.cc @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constant_propagation.h" + +namespace art { + +void ConstantPropagation::Run() { + // Process basic blocks in reverse post-order in the dominator tree, + // so that an instruction turned into a constant, used as input of + // another instruction, may possibly be used to turn that second + // instruction into a constant as well. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + // Traverse this block's instructions in (forward) order and + // replace the ones that can be statically evaluated by a + // compile-time counterpart. + for (HInstructionIterator it(block->GetInstructions()); + !it.Done(); it.Advance()) { + HInstruction* inst = it.Current(); + // Constant folding: replace `c <- a op b' with a compile-time + // evaluation of `a op b' if `a' and `b' are constant. + if (inst->IsBinaryOperation()) { + HConstant* constant = + inst->AsBinaryOperation()->TryStaticEvaluation(graph_->GetArena()); + if (constant != nullptr) { + inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant); + } + } + } + } +} + +} // namespace art diff --git a/compiler/optimizing/constant_propagation.h b/compiler/optimizing/constant_propagation.h new file mode 100644 index 0000000000..0729881888 --- /dev/null +++ b/compiler/optimizing/constant_propagation.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ +#define ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ + +#include "nodes.h" + +namespace art { + +/** + * Optimization pass performing a simple constant propagation on the + * SSA form. + */ +class ConstantPropagation : public ValueObject { + public: + explicit ConstantPropagation(HGraph* graph) + : graph_(graph) {} + + void Run(); + + private: + HGraph* const graph_; + + DISALLOW_COPY_AND_ASSIGN(ConstantPropagation); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CONSTANT_PROPAGATION_H_ diff --git a/compiler/optimizing/constant_propagation_test.cc b/compiler/optimizing/constant_propagation_test.cc new file mode 100644 index 0000000000..5c8c709439 --- /dev/null +++ b/compiler/optimizing/constant_propagation_test.cc @@ -0,0 +1,487 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constant_propagation.h" +#include "dead_code_elimination.h" +#include "pretty_printer.h" +#include "graph_checker.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +static void TestCode(const uint16_t* data, + const std::string& expected_before, + const std::string& expected_after_cp, + const std::string& expected_after_dce) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateCFG(&allocator, data); + ASSERT_NE(graph, nullptr); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + + StringPrettyPrinter printer_before(graph); + printer_before.VisitInsertionOrder(); + std::string actual_before = printer_before.str(); + ASSERT_EQ(expected_before, actual_before); + + ConstantPropagation(graph).Run(); + + StringPrettyPrinter printer_after_cp(graph); + printer_after_cp.VisitInsertionOrder(); + std::string actual_after_cp = printer_after_cp.str(); + ASSERT_EQ(expected_after_cp, actual_after_cp); + + DeadCodeElimination(graph).Run(); + + StringPrettyPrinter printer_after_dce(graph); + printer_after_dce.VisitInsertionOrder(); + std::string actual_after_dce = printer_after_dce.str(); + ASSERT_EQ(expected_after_dce, actual_after_dce); + + SSAChecker ssa_checker(&allocator, graph); + ssa_checker.VisitInsertionOrder(); + ASSERT_TRUE(ssa_checker.IsValid()); +} + + +/** + * Tiny three-register program exercising int constant folding on addition. + * + * 16-bit + * offset + * ------ + * v0 <- 1 0. const/4 v0, #+1 + * v1 <- 2 1. const/4 v1, #+2 + * v2 <- v0 + v1 2. add-int v2, v0, v1 + * return v2 4. return v2 + */ +TEST(ConstantPropagation, IntConstantFoldingOnAddition1) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 1 << 12, + Instruction::CONST_4 | 1 << 8 | 2 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::RETURN | 2 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 14: SuspendCheck\n" + " 15: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 9: Add(3, 5) [12]\n" + " 12: Return(9)\n" + "BasicBlock 2, pred: 1\n" + " 13: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, + { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, + { " 9: Add(3, 5) [12]\n", " 16: IntConstant [12]\n" }, + { " 12: Return(9)\n", " 12: Return(16)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 3: IntConstant\n", removed }, + { " 5: IntConstant\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +/** + * Small three-register program exercising int constant folding on addition. + * + * 16-bit + * offset + * ------ + * v0 <- 1 0. const/4 v0, #+1 + * v1 <- 2 1. const/4 v1, #+2 + * v0 <- v0 + v1 2. add-int/2addr v0, v1 + * v1 <- 3 3. const/4 v1, #+3 + * v2 <- 4 4. const/4 v2, #+4 + * v1 <- v1 + v2 5. add-int/2addr v1, v2 + * v2 <- v0 + v1 6. add-int v2, v0, v1 + * return v2 8. return v2 + */ +TEST(ConstantPropagation, IntConstantFoldingOnAddition2) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 1 << 12, + Instruction::CONST_4 | 1 << 8 | 2 << 12, + Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12, + Instruction::CONST_4 | 1 << 8 | 3 << 12, + Instruction::CONST_4 | 2 << 8 | 4 << 12, + Instruction::ADD_INT_2ADDR | 1 << 8 | 2 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::RETURN | 2 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 11: IntConstant [17]\n" + " 13: IntConstant [17]\n" + " 26: SuspendCheck\n" + " 27: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 9: Add(3, 5) [21]\n" + " 17: Add(11, 13) [21]\n" + " 21: Add(9, 17) [24]\n" + " 24: Return(21)\n" + "BasicBlock 2, pred: 1\n" + " 25: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, + { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, + { " 11: IntConstant [17]\n", " 11: IntConstant\n" }, + { " 13: IntConstant [17]\n", " 13: IntConstant\n" }, + { " 9: Add(3, 5) [21]\n", " 28: IntConstant\n" }, + { " 17: Add(11, 13) [21]\n", " 29: IntConstant\n" }, + { " 21: Add(9, 17) [24]\n", " 30: IntConstant [24]\n" }, + { " 24: Return(21)\n", " 24: Return(30)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 3: IntConstant\n", removed }, + { " 5: IntConstant\n", removed }, + { " 11: IntConstant\n", removed }, + { " 13: IntConstant\n", removed }, + { " 28: IntConstant\n", removed }, + { " 29: IntConstant\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +/** + * Tiny three-register program exercising int constant folding on subtraction. + * + * 16-bit + * offset + * ------ + * v0 <- 3 0. const/4 v0, #+3 + * v1 <- 2 1. const/4 v1, #+2 + * v2 <- v0 - v1 2. sub-int v2, v0, v1 + * return v2 4. return v2 + */ +TEST(ConstantPropagation, IntConstantFoldingOnSubtraction) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 3 << 12, + Instruction::CONST_4 | 1 << 8 | 2 << 12, + Instruction::SUB_INT | 2 << 8, 0 | 1 << 8, + Instruction::RETURN | 2 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 14: SuspendCheck\n" + " 15: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 9: Sub(3, 5) [12]\n" + " 12: Return(9)\n" + "BasicBlock 2, pred: 1\n" + " 13: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, + { " 5: IntConstant [9]\n", " 5: IntConstant\n" }, + { " 9: Sub(3, 5) [12]\n", " 16: IntConstant [12]\n" }, + { " 12: Return(9)\n", " 12: Return(16)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 3: IntConstant\n", removed }, + { " 5: IntConstant\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +#define SIX_REGISTERS_CODE_ITEM(...) \ + { 6, 0, 0, 0, 0, 0, NUM_INSTRUCTIONS(__VA_ARGS__), 0, __VA_ARGS__ } + +/** + * Tiny three-register-pair program exercising long constant folding + * on addition. + * + * 16-bit + * offset + * ------ + * (v0, v1) <- 1 0. const-wide/16 v0, #+1 + * (v2, v3) <- 2 2. const-wide/16 v2, #+2 + * (v4, v5) <- + * (v0, v1) + (v1, v2) 4. add-long v4, v0, v2 + * return (v4, v5) 6. return-wide v4 + */ +TEST(ConstantPropagation, LongConstantFoldingOnAddition) { + const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + Instruction::CONST_WIDE_16 | 0 << 8, 1, + Instruction::CONST_WIDE_16 | 2 << 8, 2, + Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8, + Instruction::RETURN_WIDE | 4 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 6: LongConstant [12]\n" + " 8: LongConstant [12]\n" + " 17: SuspendCheck\n" + " 18: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 12: Add(6, 8) [15]\n" + " 15: Return(12)\n" + "BasicBlock 2, pred: 1\n" + " 16: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, + { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, + { " 12: Add(6, 8) [15]\n", " 19: LongConstant [15]\n" }, + { " 15: Return(12)\n", " 15: Return(19)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 6: LongConstant\n", removed }, + { " 8: LongConstant\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +/** + * Tiny three-register-pair program exercising long constant folding + * on subtraction. + * + * 16-bit + * offset + * ------ + * (v0, v1) <- 3 0. const-wide/16 v0, #+3 + * (v2, v3) <- 2 2. const-wide/16 v2, #+2 + * (v4, v5) <- + * (v0, v1) - (v1, v2) 4. sub-long v4, v0, v2 + * return (v4, v5) 6. return-wide v4 + */ +TEST(ConstantPropagation, LongConstantFoldingOnSubtraction) { + const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + Instruction::CONST_WIDE_16 | 0 << 8, 3, + Instruction::CONST_WIDE_16 | 2 << 8, 2, + Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8, + Instruction::RETURN_WIDE | 4 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 6: LongConstant [12]\n" + " 8: LongConstant [12]\n" + " 17: SuspendCheck\n" + " 18: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 2\n" + " 12: Sub(6, 8) [15]\n" + " 15: Return(12)\n" + "BasicBlock 2, pred: 1\n" + " 16: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 6: LongConstant [12]\n", " 6: LongConstant\n" }, + { " 8: LongConstant [12]\n", " 8: LongConstant\n" }, + { " 12: Sub(6, 8) [15]\n", " 19: LongConstant [15]\n" }, + { " 15: Return(12)\n", " 15: Return(19)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 6: LongConstant\n", removed }, + { " 8: LongConstant\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +/** + * Three-register program with jumps leading to the creation of many + * blocks. + * + * The intent of this test is to ensure that all constant expressions + * are actually evaluated at compile-time, thanks to the reverse + * (forward) post-order traversal of the the dominator tree. + * + * 16-bit + * offset + * ------ + * v0 <- 0 0. const/4 v0, #+0 + * v1 <- 1 1. const/4 v1, #+1 + * v2 <- v0 + v1 2. add-int v2, v0, v1 + * goto L2 4. goto +4 + * L1: v1 <- v0 + 3 5. add-int/lit16 v1, v0, #+3 + * goto L3 7. goto +4 + * L2: v0 <- v2 + 2 8. add-int/lit16 v0, v2, #+2 + * goto L1 10. goto +(-5) + * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 + * return v2 13. return v2 + */ +TEST(ConstantPropagation, IntConstantFoldingAndJumps) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 0 << 12, + Instruction::CONST_4 | 1 << 8 | 1 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::GOTO | 4 << 8, + Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3, + Instruction::GOTO | 4 << 8, + Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2, + static_cast<uint16_t>(Instruction::GOTO | -5 << 8), + Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4, + Instruction::RETURN | 2 << 8); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 13: IntConstant [14]\n" + " 18: IntConstant [19]\n" + " 24: IntConstant [25]\n" + " 30: SuspendCheck\n" + " 31: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 3\n" + " 9: Add(3, 5) [19]\n" + " 11: Goto 3\n" + "BasicBlock 2, pred: 3, succ: 4\n" + " 14: Add(19, 13) [25]\n" + " 16: Goto 4\n" + "BasicBlock 3, pred: 1, succ: 2\n" + " 19: Add(9, 18) [14]\n" + " 21: SuspendCheck\n" + " 22: Goto 2\n" + "BasicBlock 4, pred: 2, succ: 5\n" + " 25: Add(14, 24) [28]\n" + " 28: Return(25)\n" + "BasicBlock 5, pred: 4\n" + " 29: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 3: IntConstant [9]\n", " 3: IntConstant\n" }, + { " 5: IntConstant [9]\n", " 5: IntConstant []\n" }, + { " 13: IntConstant [14]\n", " 13: IntConstant\n" }, + { " 18: IntConstant [19]\n", " 18: IntConstant\n" }, + { " 24: IntConstant [25]\n", " 24: IntConstant\n" }, + { " 9: Add(3, 5) [19]\n", " 32: IntConstant []\n" }, + { " 14: Add(19, 13) [25]\n", " 34: IntConstant\n" }, + { " 19: Add(9, 18) [14]\n", " 33: IntConstant []\n" }, + { " 25: Add(14, 24) [28]\n", " 35: IntConstant [28]\n" }, + { " 28: Return(25)\n", " 28: Return(35)\n"} + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 3: IntConstant\n", removed }, + { " 13: IntConstant\n", removed }, + { " 18: IntConstant\n", removed }, + { " 24: IntConstant\n", removed }, + { " 34: IntConstant\n", removed }, + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + + +/** + * Three-register program with a constant (static) condition. + * + * 16-bit + * offset + * ------ + * v1 <- 1 0. const/4 v1, #+1 + * v0 <- 0 1. const/4 v0, #+0 + * if v1 >= 0 goto L1 2. if-gez v1, +3 + * v0 <- v1 4. move v0, v1 + * L1: v2 <- v0 + v1 5. add-int v2, v0, v1 + * return-void 7. return + */ +TEST(ConstantPropagation, ConstantCondition) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 1 << 8 | 1 << 12, + Instruction::CONST_4 | 0 << 8 | 0 << 12, + Instruction::IF_GEZ | 1 << 8, 3, + Instruction::MOVE | 0 << 8 | 1 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::RETURN_VOID); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [15, 22, 8]\n" + " 5: IntConstant [22, 8]\n" + " 19: SuspendCheck\n" + " 20: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" + " 8: GreaterThanOrEqual(3, 5) [9]\n" + " 9: If(8)\n" + "BasicBlock 2, pred: 1, succ: 3\n" + " 12: Goto 3\n" + "BasicBlock 3, pred: 2, 5, succ: 4\n" + " 22: Phi(3, 5) [15]\n" + " 15: Add(22, 3)\n" + " 17: ReturnVoid\n" + "BasicBlock 4, pred: 3\n" + " 18: Exit\n" + "BasicBlock 5, pred: 1, succ: 3\n" + " 21: Goto 3\n"; + + // Expected difference after constant propagation. + diff_t expected_cp_diff = { + { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [15, 22]\n" }, + { " 5: IntConstant [22, 8]\n", " 5: IntConstant [22]\n" }, + { " 8: GreaterThanOrEqual(3, 5) [9]\n", " 23: IntConstant [9]\n" }, + { " 9: If(8)\n", " 9: If(23)\n" } + }; + std::string expected_after_cp = Patch(expected_before, expected_cp_diff); + + // Expected difference after dead code elimination. + diff_t expected_dce_diff = { + { " 3: IntConstant [15, 22]\n", " 3: IntConstant [22]\n" }, + { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, + { " 15: Add(22, 3)\n", removed } + }; + std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff); + + TestCode(data, expected_before, expected_after_cp, expected_after_dce); +} + +} // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc new file mode 100644 index 0000000000..fe2adc77d0 --- /dev/null +++ b/compiler/optimizing/dead_code_elimination.cc @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dead_code_elimination.h" + +#include "base/bit_vector-inl.h" + +namespace art { + +void DeadCodeElimination::Run() { + // Process basic blocks in post-order in the dominator tree, so that + // a dead instruction depending on another dead instruction is + // removed. + for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { + HBasicBlock* block = b.Current(); + // Traverse this block's instructions in backward order and remove + // the unused ones. + HBackwardInstructionIterator i(block->GetInstructions()); + // Skip the first iteration, as the last instruction of a block is + // a branching instruction. + DCHECK(i.Current()->IsControlFlow()); + for (i.Advance(); !i.Done(); i.Advance()) { + HInstruction* inst = i.Current(); + DCHECK(!inst->IsControlFlow()); + if (!inst->HasSideEffects() && !inst->HasUses() && !inst->IsSuspendCheck()) { + block->RemoveInstruction(inst); + } + } + } +} + +} // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h new file mode 100644 index 0000000000..48739be494 --- /dev/null +++ b/compiler/optimizing/dead_code_elimination.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ + +#include "nodes.h" + +namespace art { + +/** + * Optimization pass performing dead code elimination (removal of + * unused variables/instructions) on the SSA form. + */ +class DeadCodeElimination : public ValueObject { + public: + explicit DeadCodeElimination(HGraph* graph) + : graph_(graph) {} + + void Run(); + + private: + HGraph* const graph_; + + DISALLOW_COPY_AND_ASSIGN(DeadCodeElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc new file mode 100644 index 0000000000..245bcb21d5 --- /dev/null +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dead_code_elimination.h" +#include "pretty_printer.h" +#include "graph_checker.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +static void TestCode(const uint16_t* data, + const std::string& expected_before, + const std::string& expected_after) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateCFG(&allocator, data); + ASSERT_NE(graph, nullptr); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + + StringPrettyPrinter printer_before(graph); + printer_before.VisitInsertionOrder(); + std::string actual_before = printer_before.str(); + ASSERT_EQ(actual_before, expected_before); + + DeadCodeElimination(graph).Run(); + + StringPrettyPrinter printer_after(graph); + printer_after.VisitInsertionOrder(); + std::string actual_after = printer_after.str(); + ASSERT_EQ(actual_after, expected_after); + + SSAChecker ssa_checker(&allocator, graph); + ssa_checker.VisitInsertionOrder(); + ASSERT_TRUE(ssa_checker.IsValid()); +} + + +/** + * Small three-register program. + * + * 16-bit + * offset + * ------ + * v1 <- 1 0. const/4 v1, #+1 + * v0 <- 0 1. const/4 v0, #+0 + * if v1 >= 0 goto L1 2. if-gez v1, +3 + * v0 <- v1 4. move v0, v1 + * L1: v2 <- v0 + v1 5. add-int v2, v0, v1 + * return-void 7. return + */ +TEST(DeadCodeElimination, AdditionAndConditionalJump) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 1 << 8 | 1 << 12, + Instruction::CONST_4 | 0 << 8 | 0 << 12, + Instruction::IF_GEZ | 1 << 8, 3, + Instruction::MOVE | 0 << 8 | 1 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::RETURN_VOID); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [15, 22, 8]\n" + " 5: IntConstant [22, 8]\n" + " 19: SuspendCheck\n" + " 20: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" + " 8: GreaterThanOrEqual(3, 5) [9]\n" + " 9: If(8)\n" + "BasicBlock 2, pred: 1, succ: 3\n" + " 12: Goto 3\n" + "BasicBlock 3, pred: 2, 5, succ: 4\n" + " 22: Phi(3, 5) [15]\n" + " 15: Add(22, 3)\n" + " 17: ReturnVoid\n" + "BasicBlock 4, pred: 3\n" + " 18: Exit\n" + "BasicBlock 5, pred: 1, succ: 3\n" + " 21: Goto 3\n"; + + diff_t expected_diff = { + { " 3: IntConstant [15, 22, 8]\n", " 3: IntConstant [22, 8]\n" }, + { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, + { " 15: Add(22, 3)\n", removed } + }; + std::string expected_after = Patch(expected_before, expected_diff); + + TestCode(data, expected_before, expected_after); +} + +/** + * Three-register program with jumps leading to the creation of many + * blocks. + * + * The intent of this test is to ensure that all dead instructions are + * actually pruned at compile-time, thanks to the (backward) + * post-order traversal of the the dominator tree. + * + * 16-bit + * offset + * ------ + * v0 <- 0 0. const/4 v0, #+0 + * v1 <- 1 1. const/4 v1, #+1 + * v2 <- v0 + v1 2. add-int v2, v0, v1 + * goto L2 4. goto +4 + * L1: v1 <- v0 + 3 5. add-int/lit16 v1, v0, #+3 + * goto L3 7. goto +4 + * L2: v0 <- v2 + 2 8. add-int/lit16 v0, v2, #+2 + * goto L1 10. goto +(-5) + * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 + * return 13. return-void + */ +TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 << 8 | 0 << 12, + Instruction::CONST_4 | 1 << 8 | 1 << 12, + Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, + Instruction::GOTO | 4 << 8, + Instruction::ADD_INT_LIT16 | 1 << 8 | 0 << 12, 3, + Instruction::GOTO | 4 << 8, + Instruction::ADD_INT_LIT16 | 0 << 8 | 2 << 12, 2, + static_cast<uint16_t>(Instruction::GOTO | -5 << 8), + Instruction::ADD_INT_LIT16 | 2 << 8 | 1 << 12, 4, + Instruction::RETURN_VOID); + + std::string expected_before = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 13: IntConstant [14]\n" + " 18: IntConstant [19]\n" + " 24: IntConstant [25]\n" + " 29: SuspendCheck\n" + " 30: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 3\n" + " 9: Add(3, 5) [19]\n" + " 11: Goto 3\n" + "BasicBlock 2, pred: 3, succ: 4\n" + " 14: Add(19, 13) [25]\n" + " 16: Goto 4\n" + "BasicBlock 3, pred: 1, succ: 2\n" + " 19: Add(9, 18) [14]\n" + " 21: SuspendCheck\n" + " 22: Goto 2\n" + "BasicBlock 4, pred: 2, succ: 5\n" + " 25: Add(14, 24)\n" + " 27: ReturnVoid\n" + "BasicBlock 5, pred: 4\n" + " 28: Exit\n"; + + // Expected difference after constant propagation. + diff_t expected_diff = { + { " 13: IntConstant [14]\n", removed }, + { " 24: IntConstant [25]\n", removed }, + { " 14: Add(19, 13) [25]\n", removed }, + // The SuspendCheck instruction following this Add instruction + // inserts the latter in an environment, thus making it "used" and + // therefore non removable. It ensues that some other Add and + // IntConstant instructions cannot be removed, as they are direct + // or indirect inputs of the initial Add instruction. + { " 19: Add(9, 18) [14]\n", " 19: Add(9, 18) []\n" }, + { " 25: Add(14, 24)\n", removed }, + }; + std::string expected_after = Patch(expected_before, expected_diff); + + TestCode(data, expected_before, expected_after); +} + +} // namespace art diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index fab9f7a5ec..c36b1436d3 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -27,9 +27,8 @@ namespace art { -static HGraph* TestCode(const uint16_t* data, ArenaPool* pool) { - ArenaAllocator allocator(pool); - HGraphBuilder builder(&allocator); +static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { + HGraphBuilder builder(allocator); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); graph->BuildDominatorTree(); @@ -44,7 +43,8 @@ TEST(FindLoopsTest, CFG1) { Instruction::RETURN_VOID); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr); } @@ -56,7 +56,8 @@ TEST(FindLoopsTest, CFG2) { Instruction::RETURN); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr); } @@ -71,7 +72,8 @@ TEST(FindLoopsTest, CFG3) { Instruction::RETURN); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr); } @@ -87,7 +89,8 @@ TEST(FindLoopsTest, CFG4) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr); } @@ -101,7 +104,8 @@ TEST(FindLoopsTest, CFG5) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { ASSERT_EQ(graph->GetBlocks().Get(i)->GetLoopInformation(), nullptr); } @@ -146,7 +150,8 @@ TEST(FindLoopsTest, Loop1) { Instruction::RETURN_VOID); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header @@ -173,7 +178,8 @@ TEST(FindLoopsTest, Loop2) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // goto block @@ -197,7 +203,8 @@ TEST(FindLoopsTest, Loop3) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // goto block @@ -222,7 +229,8 @@ TEST(FindLoopsTest, Loop4) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header @@ -248,7 +256,8 @@ TEST(FindLoopsTest, Loop5) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header @@ -271,9 +280,9 @@ TEST(FindLoopsTest, InnerLoop) { Instruction::GOTO | 0xFB00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header of outer loop @@ -302,9 +311,9 @@ TEST(FindLoopsTest, TwoLoops) { Instruction::GOTO | 0xFE00, // second loop Instruction::RETURN | 0 << 8); - ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header of first loop @@ -333,7 +342,8 @@ TEST(FindLoopsTest, NonNaturalLoop) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); ASSERT_TRUE(graph->GetBlocks().Get(3)->IsLoopHeader()); HLoopInformation* info = graph->GetBlocks().Get(3)->GetLoopInformation(); ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges().Get(0))); @@ -347,7 +357,8 @@ TEST(FindLoopsTest, DoWhileLoop) { Instruction::RETURN | 0 << 8); ArenaPool arena; - HGraph* graph = TestCode(data, &arena); + ArenaAllocator allocator(&arena); + HGraph* graph = TestCode(data, &allocator); TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header of first loop diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc new file mode 100644 index 0000000000..589b44a167 --- /dev/null +++ b/compiler/optimizing/graph_checker.cc @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph_checker.h" + +#include <string> +#include <map> +#include <sstream> + +#include "base/bit_vector-inl.h" + +namespace art { + +void GraphChecker::VisitBasicBlock(HBasicBlock* block) { + current_block_ = block; + + // Check consistency with respect to predecessors of `block`. + const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors(); + std::map<HBasicBlock*, size_t> predecessors_count; + for (size_t i = 0, e = predecessors.Size(); i < e; ++i) { + HBasicBlock* p = predecessors.Get(i); + ++predecessors_count[p]; + } + for (auto& pc : predecessors_count) { + HBasicBlock* p = pc.first; + size_t p_count_in_block_predecessors = pc.second; + const GrowableArray<HBasicBlock*>& p_successors = p->GetSuccessors(); + size_t block_count_in_p_successors = 0; + for (size_t j = 0, f = p_successors.Size(); j < f; ++j) { + if (p_successors.Get(j) == block) { + ++block_count_in_p_successors; + } + } + if (p_count_in_block_predecessors != block_count_in_p_successors) { + std::stringstream error; + error << "Block " << block->GetBlockId() + << " lists " << p_count_in_block_predecessors + << " occurrences of block " << p->GetBlockId() + << " in its predecessors, whereas block " << p->GetBlockId() + << " lists " << block_count_in_p_successors + << " occurrences of block " << block->GetBlockId() + << " in its successors."; + errors_.Insert(error.str()); + } + } + + // Check consistency with respect to successors of `block`. + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); + std::map<HBasicBlock*, size_t> successors_count; + for (size_t i = 0, e = successors.Size(); i < e; ++i) { + HBasicBlock* s = successors.Get(i); + ++successors_count[s]; + } + for (auto& sc : successors_count) { + HBasicBlock* s = sc.first; + size_t s_count_in_block_successors = sc.second; + const GrowableArray<HBasicBlock*>& s_predecessors = s->GetPredecessors(); + size_t block_count_in_s_predecessors = 0; + for (size_t j = 0, f = s_predecessors.Size(); j < f; ++j) { + if (s_predecessors.Get(j) == block) { + ++block_count_in_s_predecessors; + } + } + if (s_count_in_block_successors != block_count_in_s_predecessors) { + std::stringstream error; + error << "Block " << block->GetBlockId() + << " lists " << s_count_in_block_successors + << " occurrences of block " << s->GetBlockId() + << " in its successors, whereas block " << s->GetBlockId() + << " lists " << block_count_in_s_predecessors + << " occurrences of block " << block->GetBlockId() + << " in its predecessors."; + errors_.Insert(error.str()); + } + } + + // Ensure `block` ends with a branch instruction. + HInstruction* last_inst = block->GetLastInstruction(); + if (last_inst == nullptr || !last_inst->IsControlFlow()) { + std::stringstream error; + error << "Block " << block->GetBlockId() + << " does not end with a branch instruction."; + errors_.Insert(error.str()); + } + + // Visit this block's list of phis. + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + // Ensure this block's list of phis contains only phis. + if (!it.Current()->IsPhi()) { + std::stringstream error; + error << "Block " << current_block_->GetBlockId() + << " has a non-phi in its phi list."; + errors_.Insert(error.str()); + } + it.Current()->Accept(this); + } + + // Visit this block's list of instructions. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); + it.Advance()) { + // Ensure this block's list of instructions does not contains phis. + if (it.Current()->IsPhi()) { + std::stringstream error; + error << "Block " << current_block_->GetBlockId() + << " has a phi in its non-phi list."; + errors_.Insert(error.str()); + } + it.Current()->Accept(this); + } +} + +void GraphChecker::VisitInstruction(HInstruction* instruction) { + // Ensure `instruction` is associated with `current_block_`. + if (instruction->GetBlock() != current_block_) { + std::stringstream error; + if (instruction->IsPhi()) { + error << "Phi "; + } else { + error << "Instruction "; + } + error << instruction->GetId() << " in block " + << current_block_->GetBlockId(); + if (instruction->GetBlock() != nullptr) { + error << " associated with block " + << instruction->GetBlock()->GetBlockId() << "."; + } else { + error << " not associated with any block."; + } + errors_.Insert(error.str()); + } + + // Ensure the inputs of `instruction` are defined in a block of the graph. + for (HInputIterator input_it(instruction); !input_it.Done(); + input_it.Advance()) { + HInstruction* input = input_it.Current(); + const HInstructionList& list = input->IsPhi() + ? input->GetBlock()->GetPhis() + : input->GetBlock()->GetInstructions(); + if (!list.Contains(input)) { + std::stringstream error; + error << "Input " << input->GetId() + << " of instruction " << instruction->GetId() + << " is not defined in a basic block of the control-flow graph."; + errors_.Insert(error.str()); + } + } + + // Ensure the uses of `instruction` are defined in a block of the graph. + for (HUseIterator<HInstruction> use_it(instruction->GetUses()); + !use_it.Done(); use_it.Advance()) { + HInstruction* use = use_it.Current()->GetUser(); + const HInstructionList& list = use->IsPhi() + ? use->GetBlock()->GetPhis() + : use->GetBlock()->GetInstructions(); + if (!list.Contains(use)) { + std::stringstream error; + error << "User " << use->GetId() + << " of instruction " << instruction->GetId() + << " is not defined in a basic block of the control-flow graph."; + errors_.Insert(error.str()); + } + } +} + +void SSAChecker::VisitBasicBlock(HBasicBlock* block) { + super_type::VisitBasicBlock(block); + + // Ensure there is no critical edge (i.e., an edge connecting a + // block with multiple successors to a block with multiple + // predecessors). + if (block->GetSuccessors().Size() > 1) { + for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + HBasicBlock* successor = block->GetSuccessors().Get(j); + if (successor->GetPredecessors().Size() > 1) { + std::stringstream error; + error << "Critical edge between blocks " << block->GetBlockId() + << " and " << successor->GetBlockId() << "."; + errors_.Insert(error.str()); + } + } + } + + if (block->IsLoopHeader()) { + CheckLoop(block); + } +} + +void SSAChecker::CheckLoop(HBasicBlock* loop_header) { + int id = loop_header->GetBlockId(); + + // Ensure the pre-header block is first in the list of + // predecessors of a loop header. + if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { + std::stringstream error; + error << "Loop pre-header is not the first predecessor of the loop header " + << id << "."; + errors_.Insert(error.str()); + } + + // Ensure the loop header has only two predecessors and that only the + // second one is a back edge. + if (loop_header->GetPredecessors().Size() < 2) { + std::stringstream error; + error << "Loop header " << id << " has less than two predecessors."; + errors_.Insert(error.str()); + } else if (loop_header->GetPredecessors().Size() > 2) { + std::stringstream error; + error << "Loop header " << id << " has more than two predecessors."; + errors_.Insert(error.str()); + } else { + HLoopInformation* loop_information = loop_header->GetLoopInformation(); + HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); + if (loop_information->IsBackEdge(first_predecessor)) { + std::stringstream error; + error << "First predecessor of loop header " << id << " is a back edge."; + errors_.Insert(error.str()); + } + HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); + if (!loop_information->IsBackEdge(second_predecessor)) { + std::stringstream error; + error << "Second predecessor of loop header " << id + << " is not a back edge."; + errors_.Insert(error.str()); + } + } + + // Ensure there is only one back edge per loop. + size_t num_back_edges = + loop_header->GetLoopInformation()->GetBackEdges().Size(); + if (num_back_edges != 1) { + std::stringstream error; + error << "Loop defined by header " << id << " has " + << num_back_edges << " back edge(s)."; + errors_.Insert(error.str()); + } + + // Ensure all blocks in the loop are dominated by the loop header. + const ArenaBitVector& loop_blocks = + loop_header->GetLoopInformation()->GetBlocks(); + for (uint32_t i : loop_blocks.Indexes()) { + HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); + if (!loop_header->Dominates(loop_block)) { + std::stringstream error; + error << "Loop block " << loop_block->GetBlockId() + << " not dominated by loop header " << id; + errors_.Insert(error.str()); + } + } +} + +void SSAChecker::VisitInstruction(HInstruction* instruction) { + super_type::VisitInstruction(instruction); + + // Ensure an instruction dominates all its uses (or in the present + // case, that all uses of an instruction (used as input) are + // dominated by its definition). + for (HInputIterator input_it(instruction); !input_it.Done(); + input_it.Advance()) { + HInstruction* input = input_it.Current(); + if (!input->Dominates(instruction)) { + std::stringstream error; + error << "Instruction " << input->GetId() + << " in block " << input->GetBlock()->GetBlockId() + << " does not dominate use " << instruction->GetId() + << " in block " << current_block_->GetBlockId() << "."; + errors_.Insert(error.str()); + } + } +} + +void SSAChecker::VisitPhi(HPhi* phi) { + VisitInstruction(phi); + + // Ensure the first input of a phi is not itself. + if (phi->InputAt(0) == phi) { + std::stringstream error; + error << "Loop phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId() + << " is its own first input."; + errors_.Insert(error.str()); + } + + // Ensure the number of phi inputs is the same as the number of + // its predecessors. + const GrowableArray<HBasicBlock*>& predecessors = + phi->GetBlock()->GetPredecessors(); + if (phi->InputCount() != predecessors.Size()) { + std::stringstream error; + error << "Phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId() + << " has " << phi->InputCount() << " inputs, but block " + << phi->GetBlock()->GetBlockId() << " has " + << predecessors.Size() << " predecessors."; + errors_.Insert(error.str()); + } else { + // Ensure phi input at index I either comes from the Ith + // predecessor or from a block that dominates this predecessor. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* predecessor = predecessors.Get(i); + if (!(input->GetBlock() == predecessor + || input->GetBlock()->Dominates(predecessor))) { + std::stringstream error; + error << "Input " << input->GetId() << " at index " << i + << " of phi " << phi->GetId() + << " from block " << phi->GetBlock()->GetBlockId() + << " is not defined in predecessor number " << i + << " nor in a block dominating it."; + errors_.Insert(error.str()); + } + } + } +} + +} // namespace art diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h new file mode 100644 index 0000000000..34a770b5f3 --- /dev/null +++ b/compiler/optimizing/graph_checker.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_ +#define ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_ + +#include "nodes.h" + +namespace art { + +// A control-flow graph visitor performing various checks. +class GraphChecker : public HGraphVisitor { + public: + GraphChecker(ArenaAllocator* allocator, HGraph* graph) + : HGraphVisitor(graph), + allocator_(allocator), + errors_(allocator, 0) {} + + // Check `block`. + virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + + // Check `instruction`. + virtual void VisitInstruction(HInstruction* instruction) OVERRIDE; + + // Was the last visit of the graph valid? + bool IsValid() const { + return errors_.IsEmpty(); + } + + // Get the list of detected errors. + const GrowableArray<std::string>& GetErrors() const { + return errors_; + } + + protected: + ArenaAllocator* const allocator_; + // The block currently visited. + HBasicBlock* current_block_ = nullptr; + // Errors encountered while checking the graph. + GrowableArray<std::string> errors_; + + private: + DISALLOW_COPY_AND_ASSIGN(GraphChecker); +}; + + +// An SSA graph visitor performing various checks. +class SSAChecker : public GraphChecker { + public: + typedef GraphChecker super_type; + + SSAChecker(ArenaAllocator* allocator, HGraph* graph) + : GraphChecker(allocator, graph) {} + + // Perform SSA form checks on `block`. + virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + // Loop-related checks from block `loop_header`. + void CheckLoop(HBasicBlock* loop_header); + + // Perform SSA form checks on instructions. + virtual void VisitInstruction(HInstruction* instruction) OVERRIDE; + virtual void VisitPhi(HPhi* phi) OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(SSAChecker); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_ diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc new file mode 100644 index 0000000000..ea0692088d --- /dev/null +++ b/compiler/optimizing/graph_checker_test.cc @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph_checker.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +/** + * Create a simple control-flow graph composed of two blocks: + * + * BasicBlock 0, succ: 1 + * 0: Goto 1 + * BasicBlock 1, pred: 0 + * 1: Exit + */ +HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { + HGraph* graph = new (allocator) HGraph(allocator); + HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); + entry_block->AddInstruction(new (allocator) HGoto()); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + HBasicBlock* exit_block = new (allocator) HBasicBlock(graph); + exit_block->AddInstruction(new (allocator) HExit()); + graph->AddBlock(exit_block); + graph->SetExitBlock(exit_block); + entry_block->AddSuccessor(exit_block); + return graph; +} + + +static void TestCode(const uint16_t* data) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateCFG(&allocator, data); + ASSERT_NE(graph, nullptr); + + GraphChecker graph_checker(&allocator, graph); + graph_checker.VisitInsertionOrder(); + ASSERT_TRUE(graph_checker.IsValid()); +} + +static void TestCodeSSA(const uint16_t* data) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateCFG(&allocator, data); + ASSERT_NE(graph, nullptr); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + + SSAChecker ssa_checker(&allocator, graph); + ssa_checker.VisitInsertionOrder(); + ASSERT_TRUE(ssa_checker.IsValid()); +} + + +TEST(GraphChecker, ReturnVoid) { + const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(GraphChecker, CFG1) { + const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + Instruction::GOTO | 0x100, + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(GraphChecker, CFG2) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 3, + Instruction::GOTO | 0x100, + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(GraphChecker, CFG3) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 3, + Instruction::GOTO | 0x100, + Instruction::GOTO | 0xFF00); + + TestCode(data); +} + +// Test case with an invalid graph containing inconsistent +// predecessor/successor arcs in CFG. +TEST(GraphChecker, InconsistentPredecessorsAndSuccessors) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = CreateSimpleCFG(&allocator); + GraphChecker graph_checker(&allocator, graph); + graph_checker.VisitInsertionOrder(); + ASSERT_TRUE(graph_checker.IsValid()); + + // Remove the entry block from the exit block's predecessors, to create an + // inconsistent successor/predecessor relation. + graph->GetExitBlock()->RemovePredecessor(graph->GetEntryBlock()); + graph_checker.VisitInsertionOrder(); + ASSERT_FALSE(graph_checker.IsValid()); +} + +// Test case with an invalid graph containing a non-branch last +// instruction in a block. +TEST(GraphChecker, BlockEndingWithNonBranchInstruction) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = CreateSimpleCFG(&allocator); + GraphChecker graph_checker(&allocator, graph); + graph_checker.VisitInsertionOrder(); + ASSERT_TRUE(graph_checker.IsValid()); + + // Remove the sole instruction of the exit block (composed of a + // single Exit instruction) to make it invalid (i.e. not ending by a + // branch instruction). + HBasicBlock* exit_block = graph->GetExitBlock(); + HInstruction* last_inst = exit_block->GetLastInstruction(); + exit_block->RemoveInstruction(last_inst); + + graph_checker.VisitInsertionOrder(); + ASSERT_FALSE(graph_checker.IsValid()); +} + +TEST(SSAChecker, SSAPhi) { + // This code creates one Phi function during the conversion to SSA form. + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 3, + Instruction::CONST_4 | 4 << 12 | 0, + Instruction::RETURN | 0 << 8); + + TestCodeSSA(data); +} + +} // namespace art diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc new file mode 100644 index 0000000000..c59f8366fa --- /dev/null +++ b/compiler/optimizing/graph_test.cc @@ -0,0 +1,324 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/stringprintf.h" +#include "builder.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "pretty_printer.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* if_block = new (allocator) HBasicBlock(graph); + graph->AddBlock(if_block); + HInstruction* instr = new (allocator) HIntConstant(4); + if_block->AddInstruction(instr); + HInstruction* equal = new (allocator) HEqual(instr, instr); + if_block->AddInstruction(equal); + instr = new (allocator) HIf(equal); + if_block->AddInstruction(instr); + return if_block; +} + +static HBasicBlock* createGotoBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* got = new (allocator) HGoto(); + block->AddInstruction(got); + return block; +} + +static HBasicBlock* createReturnBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* return_instr = new (allocator) HReturnVoid(); + block->AddInstruction(return_instr); + return block; +} + +static HBasicBlock* createExitBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* exit_instr = new (allocator) HExit(); + block->AddInstruction(exit_instr); + return block; +} + + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be the return block. +TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_true = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_true); + if_true->AddSuccessor(return_block); + if_block->AddSuccessor(return_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->SimplifyCFG(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true); + + // Ensure the critical edge has been removed. + HBasicBlock* false_block = if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(); + ASSERT_NE(false_block, return_block); + + // Ensure the new block branches to the join block. + ASSERT_EQ(false_block->GetSuccessors().Get(0), return_block); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be the return block. +TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_false = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + if_false->AddSuccessor(return_block); + if_block->AddSuccessor(if_false); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false); + + graph->SimplifyCFG(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false); + + // Ensure the critical edge has been removed. + HBasicBlock* true_block = if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(); + ASSERT_NE(true_block, return_block); + + // Ensure the new block branches to the join block. + ASSERT_EQ(true_block->GetSuccessors().Get(0), return_block); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be the loop header. +TEST(GraphTest, IfSuccessorMultipleBackEdges1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->BuildDominatorTree(); + + // Ensure we still have the same if false block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + // Ensure there is only one back edge. + ASSERT_EQ(if_block->GetPredecessors().Size(), 2u); + ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block); + ASSERT_NE(if_block->GetPredecessors().Get(1), if_block); + + // Ensure the new block is the back edge. + ASSERT_EQ(if_block->GetPredecessors().Get(1), + if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be the loop header. +TEST(GraphTest, IfSuccessorMultipleBackEdges2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + if_block->AddSuccessor(if_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_block); + + graph->BuildDominatorTree(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + + // Ensure there is only one back edge. + ASSERT_EQ(if_block->GetPredecessors().Size(), 2u); + ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block); + ASSERT_NE(if_block->GetPredecessors().Get(1), if_block); + + // Ensure the new block is the back edge. + ASSERT_EQ(if_block->GetPredecessors().Get(1), + if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be a loop header with multiple pre headers. +TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* first_if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* loop_block = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(first_if_block); + first_if_block->AddSuccessor(if_block); + first_if_block->AddSuccessor(loop_block); + loop_block->AddSuccessor(loop_block); + if_block->AddSuccessor(loop_block); + if_block->AddSuccessor(return_block); + + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), loop_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->BuildDominatorTree(); + + HIf* if_instr = if_block->GetLastInstruction()->AsIf(); + // Ensure we still have the same if false block. + ASSERT_EQ(if_instr->IfFalseSuccessor(), return_block); + + // Ensure there is only one pre header.. + ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u); + + // Ensure the new block is the successor of the true block. + ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Size(), 1u); + ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Get(0), + loop_block->GetLoopInformation()->GetPreHeader()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be a loop header with multiple pre headers. +TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* first_if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* loop_block = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(first_if_block); + first_if_block->AddSuccessor(if_block); + first_if_block->AddSuccessor(loop_block); + loop_block->AddSuccessor(loop_block); + if_block->AddSuccessor(return_block); + if_block->AddSuccessor(loop_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), loop_block); + + graph->BuildDominatorTree(); + + HIf* if_instr = if_block->GetLastInstruction()->AsIf(); + // Ensure we still have the same if true block. + ASSERT_EQ(if_instr->IfTrueSuccessor(), return_block); + + // Ensure there is only one pre header.. + ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u); + + // Ensure the new block is the successor of the false block. + ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Size(), 1u); + ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Get(0), + loop_block->GetLoopInformation()->GetPreHeader()); +} + +TEST(GraphTest, InsertInstructionBefore) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* block = createGotoBlock(graph, &allocator); + HInstruction* got = block->GetLastInstruction(); + ASSERT_TRUE(got->IsControlFlow()); + + // Test at the beginning of the block. + HInstruction* first_instruction = new (&allocator) HIntConstant(4); + block->InsertInstructionBefore(first_instruction, got); + + ASSERT_NE(first_instruction->GetId(), -1); + ASSERT_EQ(first_instruction->GetBlock(), block); + ASSERT_EQ(block->GetFirstInstruction(), first_instruction); + ASSERT_EQ(block->GetLastInstruction(), got); + ASSERT_EQ(first_instruction->GetNext(), got); + ASSERT_EQ(first_instruction->GetPrevious(), nullptr); + ASSERT_EQ(got->GetNext(), nullptr); + ASSERT_EQ(got->GetPrevious(), first_instruction); + + // Test in the middle of the block. + HInstruction* second_instruction = new (&allocator) HIntConstant(4); + block->InsertInstructionBefore(second_instruction, got); + + ASSERT_NE(second_instruction->GetId(), -1); + ASSERT_EQ(second_instruction->GetBlock(), block); + ASSERT_EQ(block->GetFirstInstruction(), first_instruction); + ASSERT_EQ(block->GetLastInstruction(), got); + ASSERT_EQ(first_instruction->GetNext(), second_instruction); + ASSERT_EQ(first_instruction->GetPrevious(), nullptr); + ASSERT_EQ(second_instruction->GetNext(), got); + ASSERT_EQ(second_instruction->GetPrevious(), first_instruction); + ASSERT_EQ(got->GetNext(), nullptr); + ASSERT_EQ(got->GetPrevious(), second_instruction); +} + +} // namespace art diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5c5042e20f..0fb4737db2 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -28,8 +28,15 @@ namespace art { */ class HGraphVisualizerPrinter : public HGraphVisitor { public: - HGraphVisualizerPrinter(HGraph* graph, std::ostream& output, const CodeGenerator& codegen) - : HGraphVisitor(graph), output_(output), codegen_(codegen), indent_(0) {} + HGraphVisualizerPrinter(HGraph* graph, + std::ostream& output, + const char* pass_name, + const CodeGenerator& codegen) + : HGraphVisitor(graph), + output_(output), + pass_name_(pass_name), + codegen_(codegen), + indent_(0) {} void StartTag(const char* name) { AddIndent(); @@ -74,6 +81,25 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } + char GetTypeId(Primitive::Type type) { + // Note that Primitive::Descriptor would not work for us + // because it does not handle reference types (that is kPrimNot). + switch (type) { + case Primitive::kPrimBoolean: return 'z'; + case Primitive::kPrimByte: return 'b'; + case Primitive::kPrimChar: return 'c'; + case Primitive::kPrimShort: return 's'; + case Primitive::kPrimInt: return 'i'; + case Primitive::kPrimLong: return 'j'; + case Primitive::kPrimFloat: return 'f'; + case Primitive::kPrimDouble: return 'd'; + case Primitive::kPrimNot: return 'l'; + case Primitive::kPrimVoid: return 'v'; + } + LOG(FATAL) << "Unreachable"; + return 'v'; + } + void PrintPredecessors(HBasicBlock* block) { AddIndent(); output_ << "predecessors"; @@ -94,34 +120,77 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_<< std::endl; } + void DumpLocation(Location location, Primitive::Type type) { + if (location.IsRegister()) { + if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) { + codegen_.DumpFloatingPointRegister(output_, location.reg().RegId()); + } else { + codegen_.DumpCoreRegister(output_, location.reg().RegId()); + } + } else if (location.IsConstant()) { + output_ << "constant"; + HConstant* constant = location.GetConstant(); + if (constant->IsIntConstant()) { + output_ << " " << constant->AsIntConstant()->GetValue(); + } else if (constant->IsLongConstant()) { + output_ << " " << constant->AsLongConstant()->GetValue(); + } + } else if (location.IsInvalid()) { + output_ << "invalid"; + } else if (location.IsStackSlot()) { + output_ << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsDoubleStackSlot()); + output_ << "2x" << location.GetStackIndex() << "(sp)"; + } + } + + void VisitParallelMove(HParallelMove* instruction) { + output_ << instruction->DebugName(); + output_ << " ("; + for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { + MoveOperands* move = instruction->MoveOperandsAt(i); + DumpLocation(move->GetSource(), Primitive::kPrimInt); + output_ << " -> "; + DumpLocation(move->GetDestination(), Primitive::kPrimInt); + if (i + 1 != e) { + output_ << ", "; + } + } + output_ << ")"; + } void VisitInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); if (instruction->InputCount() > 0) { output_ << " [ "; for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) { - output_ << "v" << inputs.Current()->GetId() << " "; + output_ << GetTypeId(inputs.Current()->GetType()) << inputs.Current()->GetId() << " "; } output_ << "]"; } - if (instruction->GetLifetimePosition() != kNoLifetime) { + if (pass_name_ == kLivenessPassName && instruction->GetLifetimePosition() != kNoLifetime) { output_ << " (liveness: " << instruction->GetLifetimePosition(); if (instruction->HasLiveInterval()) { output_ << " "; const LiveInterval& interval = *instruction->GetLiveInterval(); interval.Dump(output_); - if (interval.HasRegister()) { - int reg = interval.GetRegister(); + } + output_ << ")"; + } else if (pass_name_ == kRegisterAllocatorPassName) { + LocationSummary* locations = instruction->GetLocations(); + if (locations != nullptr) { + output_ << " ( "; + for (size_t i = 0; i < instruction->InputCount(); ++i) { + DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType()); output_ << " "; - if (instruction->GetType() == Primitive::kPrimFloat - || instruction->GetType() == Primitive::kPrimDouble) { - codegen_.DumpFloatingPointRegister(output_, reg); - } else { - codegen_.DumpCoreRegister(output_, reg); - } + } + output_ << ")"; + if (locations->Out().IsValid()) { + output_ << " -> "; + DumpLocation(locations->Out(), instruction->GetType()); } } - output_ << ")"; } } @@ -131,15 +200,16 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HInstruction* instruction = it.Current(); AddIndent(); int bci = 0; - output_ << bci << " " << instruction->NumberOfUses() << " v" << instruction->GetId() << " "; + output_ << bci << " " << instruction->NumberOfUses() + << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; instruction->Accept(this); output_ << kEndInstructionMarker << std::endl; } } - void Run(const char* pass_name) { + void Run() { StartTag("cfg"); - PrintProperty("name", pass_name); + PrintProperty("name", pass_name_); VisitInsertionOrder(); EndTag("cfg"); } @@ -170,7 +240,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { AddIndent(); HInstruction* instruction = it.Current(); - output_ << instruction->GetId() << " v" << instruction->GetId() << "[ "; + output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType()) + << instruction->GetId() << "[ "; for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) { output_ << inputs.Current()->GetId() << " "; } @@ -188,6 +259,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { private: std::ostream& output_; + const char* pass_name_; const CodeGenerator& codegen_; size_t indent_; @@ -209,7 +281,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph, *output_, codegen_); + HGraphVisualizerPrinter printer(graph, *output_, "", codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", pretty_name.c_str()); printer.PrintProperty("method", pretty_name.c_str()); @@ -227,7 +299,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph, *output_, codegen_); + HGraphVisualizerPrinter printer(graph, *output_, "", codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", name); printer.PrintProperty("method", name); @@ -239,8 +311,8 @@ void HGraphVisualizer::DumpGraph(const char* pass_name) { if (!is_enabled_) { return; } - HGraphVisualizerPrinter printer(graph_, *output_, codegen_); - printer.Run(pass_name); + HGraphVisualizerPrinter printer(graph_, *output_, pass_name, codegen_); + printer.Run(); } } // namespace art diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 2638cf504d..6e2c6fd11f 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -25,6 +25,11 @@ class CodeGenerator; class DexCompilationUnit; class HGraph; +// TODO: Create an analysis/optimization abstraction. +static const char* kLivenessPassName = "liveness"; +static const char* kRegisterAllocatorPassName = "register"; +static const char* kGVNPassName = "gvn"; + /** * If enabled, emits compilation information suitable for the c1visualizer tool * and IRHydra. diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc new file mode 100644 index 0000000000..027b3d4ff3 --- /dev/null +++ b/compiler/optimizing/gvn.cc @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gvn.h" + +namespace art { + +void GlobalValueNumberer::Run() { + ComputeSideEffects(); + + sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_)); + + // Do reverse post order to ensure the non back-edge predecessors of a block are + // visited before the block itself. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } +} + +void GlobalValueNumberer::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) { + int id = info->GetHeader()->GetBlockId(); + loop_effects_.Put(id, loop_effects_.Get(id).Union(effects)); +} + +void GlobalValueNumberer::ComputeSideEffects() { + if (kIsDebugBuild) { + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + SideEffects effects = GetBlockEffects(block); + DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + if (block->IsLoopHeader()) { + effects = GetLoopEffects(block); + DCHECK(!effects.HasSideEffects() && !effects.HasDependencies()); + } + } + } + + // Do a post order visit to ensure we visit a loop header after its loop body. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + + SideEffects effects = SideEffects::None(); + // Update `effects` with the side effects of all instructions in this block. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + effects = effects.Union(instruction->GetSideEffects()); + if (effects.HasAllSideEffects()) { + break; + } + } + + block_effects_.Put(block->GetBlockId(), effects); + + if (block->IsLoopHeader()) { + // The side effects of the loop header are part of the loop. + UpdateLoopEffects(block->GetLoopInformation(), effects); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + if (pre_header->IsInLoop()) { + // Update the side effects of the outer loop with the side effects of the inner loop. + // Note that this works because we know all the blocks of the inner loop are visited + // before the loop header of the outer loop. + UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block)); + } + } else if (block->IsInLoop()) { + // Update the side effects of the loop with the side effects of this block. + UpdateLoopEffects(block->GetLoopInformation(), effects); + } + } +} + +SideEffects GlobalValueNumberer::GetLoopEffects(HBasicBlock* block) const { + DCHECK(block->IsLoopHeader()); + return loop_effects_.Get(block->GetBlockId()); +} + +SideEffects GlobalValueNumberer::GetBlockEffects(HBasicBlock* block) const { + return block_effects_.Get(block->GetBlockId()); +} + +static bool IsLoopExit(HBasicBlock* block, HBasicBlock* successor) { + HLoopInformation* block_info = block->GetLoopInformation(); + HLoopInformation* other_info = successor->GetLoopInformation(); + return block_info != other_info && (other_info == nullptr || block_info->IsIn(*other_info)); +} + +void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { + if (kIsDebugBuild) { + // Check that all non back-edge processors have been visited. + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = block->GetPredecessors().Get(i); + DCHECK(visited_.Get(predecessor->GetBlockId()) + || (block->GetLoopInformation() != nullptr + && (block->GetLoopInformation()->GetBackEdges().Get(0) == predecessor))); + } + visited_.Put(block->GetBlockId(), true); + } + + ValueSet* set = sets_.Get(block->GetBlockId()); + + if (block->IsLoopHeader()) { + set->Kill(GetLoopEffects(block)); + } + + HInstruction* current = block->GetFirstInstruction(); + while (current != nullptr) { + set->Kill(current->GetSideEffects()); + // Save the next instruction in case `current` is removed from the graph. + HInstruction* next = current->GetNext(); + if (current->CanBeMoved()) { + HInstruction* existing = set->Lookup(current); + if (existing != nullptr) { + current->ReplaceWith(existing); + current->GetBlock()->RemoveInstruction(current); + } else { + set->Add(current); + } + } + current = next; + } + + if (block == graph_->GetEntryBlock()) { + // The entry block should only accumulate constant instructions, and + // the builder puts constants only in the entry block. + // Therefore, there is no need to propagate the value set to the next block. + DCHECK_EQ(block->GetDominatedBlocks().Size(), 1u); + HBasicBlock* dominated = block->GetDominatedBlocks().Get(0); + sets_.Put(dominated->GetBlockId(), new (allocator_) ValueSet(allocator_)); + return; + } + + // Copy the value set to dominated blocks. We can re-use + // the current set for the last dominated block because we are done visiting + // this block. + for (size_t i = 0, e = block->GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = block->GetDominatedBlocks().Get(i); + sets_.Put(dominated->GetBlockId(), i == e - 1 ? set : set->Copy()); + } + + // Kill instructions in the value set of each successor. If the successor + // is a loop exit, then we use the side effects of the loop. If not, we use + // the side effects of this block. + for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { + HBasicBlock* successor = block->GetSuccessors().Get(i); + if (successor->IsLoopHeader() + && successor->GetLoopInformation()->GetBackEdges().Get(0) == block) { + // In case of a back edge, we already have visited the loop header. + // We should not update its value set, because the last dominated block + // of the loop header uses the same value set. + DCHECK(visited_.Get(successor->GetBlockId())); + continue; + } + DCHECK(!visited_.Get(successor->GetBlockId())); + ValueSet* successor_set = sets_.Get(successor->GetBlockId()); + // The dominator sets the set, and we are guaranteed to have visited it already. + DCHECK(successor_set != nullptr); + + // If this block dominates this successor there is nothing to do. + // Also if the set is empty, there is nothing to kill. + if (successor->GetDominator() != block && !successor_set->IsEmpty()) { + if (block->IsInLoop() && IsLoopExit(block, successor)) { + // All instructions killed in the loop must be killed for a loop exit. + SideEffects effects = GetLoopEffects(block->GetLoopInformation()->GetHeader()); + sets_.Get(successor->GetBlockId())->Kill(effects); + } else { + // Following block (that might be in the same loop). + // Just kill instructions based on this block's side effects. + sets_.Get(successor->GetBlockId())->Kill(GetBlockEffects(block)); + } + } + } +} + +} // namespace art diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h new file mode 100644 index 0000000000..41b3ceb509 --- /dev/null +++ b/compiler/optimizing/gvn.h @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_GVN_H_ +#define ART_COMPILER_OPTIMIZING_GVN_H_ + +#include <gtest/gtest.h> +#include "nodes.h" + +namespace art { + +/** + * A node in the collision list of a ValueSet. Encodes the instruction, + * the hash code, and the next node in the collision list. + */ +class ValueSetNode : public ArenaObject { + public: + ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next) + : instruction_(instruction), hash_code_(hash_code), next_(next) {} + + size_t GetHashCode() const { return hash_code_; } + HInstruction* GetInstruction() const { return instruction_; } + ValueSetNode* GetNext() const { return next_; } + void SetNext(ValueSetNode* node) { next_ = node; } + + private: + HInstruction* const instruction_; + const size_t hash_code_; + ValueSetNode* next_; + + DISALLOW_COPY_AND_ASSIGN(ValueSetNode); +}; + +/** + * A ValueSet holds instructions that can replace other instructions. It is updated + * through the `Add` method, and the `Kill` method. The `Kill` method removes + * instructions that are affected by the given side effect. + * + * The `Lookup` method returns an equivalent instruction to the given instruction + * if there is one in the set. In GVN, we would say those instructions have the + * same "number". + */ +class ValueSet : public ArenaObject { + public: + explicit ValueSet(ArenaAllocator* allocator) + : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + table_[i] = nullptr; + } + } + + // Adds an instruction in the set. + void Add(HInstruction* instruction) { + DCHECK(Lookup(instruction) == nullptr); + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + if (table_[index] == nullptr) { + table_[index] = instruction; + } else { + collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_); + } + ++number_of_entries_; + } + + // If in the set, returns an equivalent instruction to the given instruction. Returns + // null otherwise. + HInstruction* Lookup(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + HInstruction* existing = table_[index]; + if (existing != nullptr && existing->Equals(instruction)) { + return existing; + } + + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + if (node->GetHashCode() == hash_code) { + existing = node->GetInstruction(); + if (existing->Equals(instruction)) { + return existing; + } + } + } + return nullptr; + } + + // Removes all instructions in the set that are affected by the given side effects. + void Kill(SideEffects side_effects) { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + HInstruction* instruction = table_[i]; + if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) { + table_[i] = nullptr; + --number_of_entries_; + } + } + + ValueSetNode* current = collisions_; + ValueSetNode* previous = nullptr; + while (current != nullptr) { + HInstruction* instruction = current->GetInstruction(); + if (instruction->GetSideEffects().DependsOn(side_effects)) { + if (previous == nullptr) { + collisions_ = current->GetNext(); + } else { + previous->SetNext(current->GetNext()); + } + --number_of_entries_; + } else { + previous = current; + } + current = current->GetNext(); + } + } + + // Returns a copy of this set. + ValueSet* Copy() const { + ValueSet* copy = new (allocator_) ValueSet(allocator_); + + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + copy->table_[i] = table_[i]; + } + + // Note that the order will be inverted in the copy. This is fine, as the order is not + // relevant for a ValueSet. + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + copy->collisions_ = new (allocator_) ValueSetNode( + node->GetInstruction(), node->GetHashCode(), copy->collisions_); + } + + copy->number_of_entries_ = number_of_entries_; + return copy; + } + + bool IsEmpty() const { return number_of_entries_ == 0; } + size_t GetNumberOfEntries() const { return number_of_entries_; } + + private: + static constexpr size_t kDefaultNumberOfEntries = 8; + + ArenaAllocator* const allocator_; + + // The number of entries in the set. + size_t number_of_entries_; + + // The internal implementation of the set. It uses a combination of a hash code based + // fixed-size list, and a linked list to handle hash code collisions. + // TODO: Tune the fixed size list original size, and support growing it. + ValueSetNode* collisions_; + HInstruction* table_[kDefaultNumberOfEntries]; + + DISALLOW_COPY_AND_ASSIGN(ValueSet); +}; + +/** + * Optimization phase that removes redundant instruction. + */ +class GlobalValueNumberer : public ValueObject { + public: + GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph) + : allocator_(allocator), + graph_(graph), + block_effects_(allocator, graph->GetBlocks().Size()), + loop_effects_(allocator, graph->GetBlocks().Size()), + sets_(allocator, graph->GetBlocks().Size()), + visited_(allocator, graph->GetBlocks().Size()) { + size_t number_of_blocks = graph->GetBlocks().Size(); + block_effects_.SetSize(number_of_blocks); + loop_effects_.SetSize(number_of_blocks); + sets_.SetSize(number_of_blocks); + visited_.SetSize(number_of_blocks); + + for (size_t i = 0; i < number_of_blocks; ++i) { + block_effects_.Put(i, SideEffects::None()); + loop_effects_.Put(i, SideEffects::None()); + } + } + + void Run(); + + private: + // Per-block GVN. Will also update the ValueSet of the dominated and + // successor blocks. + void VisitBasicBlock(HBasicBlock* block); + + // Compute side effects of individual blocks and loops. The GVN algorithm + // will use these side effects to update the ValueSet of individual blocks. + void ComputeSideEffects(); + + void UpdateLoopEffects(HLoopInformation* info, SideEffects effects); + SideEffects GetLoopEffects(HBasicBlock* block) const; + SideEffects GetBlockEffects(HBasicBlock* block) const; + + ArenaAllocator* const allocator_; + HGraph* const graph_; + + // Side effects of individual blocks, that is the union of the side effects + // of the instructions in the block. + GrowableArray<SideEffects> block_effects_; + + // Side effects of loops, that is the union of the side effects of the + // blocks contained in that loop. + GrowableArray<SideEffects> loop_effects_; + + // ValueSet for blocks. Initially null, but for an individual block they + // are allocated and populated by the dominator, and updated by all blocks + // in the path from the dominator to the block. + GrowableArray<ValueSet*> sets_; + + // Mark visisted blocks. Only used for debugging. + GrowableArray<bool> visited_; + + FRIEND_TEST(GVNTest, LoopSideEffects); + DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_GVN_H_ diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc new file mode 100644 index 0000000000..ad6e3382bc --- /dev/null +++ b/compiler/optimizing/gvn_test.cc @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "builder.h" +#include "gvn.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +TEST(GVNTest, LocalFieldElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + entry->AddInstruction(parameter); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + HInstruction* to_remove = block->GetLastInstruction(); + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43))); + HInstruction* different_offset = block->GetLastInstruction(); + // Kill the value. + block->AddInstruction(new (&allocator) HInstanceFieldSet( + parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42))); + HInstruction* use_after_kill = block->GetLastInstruction(); + block->AddInstruction(new (&allocator) HExit()); + + ASSERT_EQ(to_remove->GetBlock(), block); + ASSERT_EQ(different_offset->GetBlock(), block); + ASSERT_EQ(use_after_kill->GetBlock(), block); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + GlobalValueNumberer(&allocator, graph).Run(); + + ASSERT_TRUE(to_remove->GetBlock() == nullptr); + ASSERT_EQ(different_offset->GetBlock(), block); + ASSERT_EQ(use_after_kill->GetBlock(), block); +} + +TEST(GVNTest, GlobalFieldElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + entry->AddInstruction(parameter); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + + block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); + HBasicBlock* then = new (&allocator) HBasicBlock(graph); + HBasicBlock* else_ = new (&allocator) HBasicBlock(graph); + HBasicBlock* join = new (&allocator) HBasicBlock(graph); + graph->AddBlock(then); + graph->AddBlock(else_); + graph->AddBlock(join); + + block->AddSuccessor(then); + block->AddSuccessor(else_); + then->AddSuccessor(join); + else_->AddSuccessor(join); + + then->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + then->AddInstruction(new (&allocator) HGoto()); + else_->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + else_->AddInstruction(new (&allocator) HGoto()); + join->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + join->AddInstruction(new (&allocator) HExit()); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + GlobalValueNumberer(&allocator, graph).Run(); + + // Check that all field get instructions have been GVN'ed. + ASSERT_TRUE(then->GetFirstInstruction()->IsGoto()); + ASSERT_TRUE(else_->GetFirstInstruction()->IsGoto()); + ASSERT_TRUE(join->GetFirstInstruction()->IsExit()); +} + +TEST(GVNTest, LoopFieldElimination) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + entry->AddInstruction(parameter); + + HBasicBlock* block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(block); + entry->AddSuccessor(block); + block->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); + HBasicBlock* loop_body = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + + graph->AddBlock(loop_header); + graph->AddBlock(loop_body); + graph->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(loop_body); + loop_header->AddSuccessor(exit); + loop_body->AddSuccessor(loop_header); + + loop_header->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); + loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); + + // Kill inside the loop body to prevent field gets inside the loop header + // and the body to be GVN'ed. + loop_body->AddInstruction(new (&allocator) HInstanceFieldSet( + parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + HInstruction* field_set = loop_body->GetLastInstruction(); + loop_body->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); + loop_body->AddInstruction(new (&allocator) HGoto()); + + exit->AddInstruction( + new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42))); + HInstruction* field_get_in_exit = exit->GetLastInstruction(); + exit->AddInstruction(new (&allocator) HExit()); + + ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header); + ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body); + ASSERT_EQ(field_get_in_exit->GetBlock(), exit); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + graph->FindNaturalLoops(); + GlobalValueNumberer(&allocator, graph).Run(); + + // Check that all field get instructions are still there. + ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header); + ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body); + // The exit block is dominated by the loop header, whose field get + // does not get killed by the loop flags. + ASSERT_TRUE(field_get_in_exit->GetBlock() == nullptr); + + // Now remove the field set, and check that all field get instructions have been GVN'ed. + loop_body->RemoveInstruction(field_set); + GlobalValueNumberer(&allocator, graph).Run(); + + ASSERT_TRUE(field_get_in_loop_header->GetBlock() == nullptr); + ASSERT_TRUE(field_get_in_loop_body->GetBlock() == nullptr); + ASSERT_TRUE(field_get_in_exit->GetBlock() == nullptr); +} + +// Test that inner loops affect the side effects of the outer loop. +TEST(GVNTest, LoopSideEffects) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + + HBasicBlock* outer_loop_header = new (&allocator) HBasicBlock(graph); + HBasicBlock* outer_loop_body = new (&allocator) HBasicBlock(graph); + HBasicBlock* outer_loop_exit = new (&allocator) HBasicBlock(graph); + HBasicBlock* inner_loop_header = new (&allocator) HBasicBlock(graph); + HBasicBlock* inner_loop_body = new (&allocator) HBasicBlock(graph); + HBasicBlock* inner_loop_exit = new (&allocator) HBasicBlock(graph); + + graph->AddBlock(outer_loop_header); + graph->AddBlock(outer_loop_body); + graph->AddBlock(outer_loop_exit); + graph->AddBlock(inner_loop_header); + graph->AddBlock(inner_loop_body); + graph->AddBlock(inner_loop_exit); + + entry->AddSuccessor(outer_loop_header); + outer_loop_header->AddSuccessor(outer_loop_body); + outer_loop_header->AddSuccessor(outer_loop_exit); + outer_loop_body->AddSuccessor(inner_loop_header); + inner_loop_header->AddSuccessor(inner_loop_body); + inner_loop_header->AddSuccessor(inner_loop_exit); + inner_loop_body->AddSuccessor(inner_loop_header); + inner_loop_exit->AddSuccessor(outer_loop_header); + + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimBoolean); + entry->AddInstruction(parameter); + entry->AddInstruction(new (&allocator) HGoto()); + outer_loop_header->AddInstruction(new (&allocator) HIf(parameter)); + outer_loop_body->AddInstruction(new (&allocator) HGoto()); + inner_loop_header->AddInstruction(new (&allocator) HIf(parameter)); + inner_loop_body->AddInstruction(new (&allocator) HGoto()); + inner_loop_exit->AddInstruction(new (&allocator) HGoto()); + outer_loop_exit->AddInstruction(new (&allocator) HExit()); + + graph->BuildDominatorTree(); + graph->TransformToSSA(); + graph->FindNaturalLoops(); + + ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn( + *outer_loop_header->GetLoopInformation())); + + // Check that the loops don't have side effects. + { + // Make one block with a side effect. + entry->AddInstruction(new (&allocator) HInstanceFieldSet( + parameter, parameter, Primitive::kPrimNot, MemberOffset(42))); + + GlobalValueNumberer gvn(&allocator, graph); + gvn.Run(); + + ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); + ASSERT_FALSE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + } + + // Check that the side effects of the outer loop does not affect the inner loop. + { + outer_loop_body->InsertInstructionBefore( + new (&allocator) HInstanceFieldSet( + parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + outer_loop_body->GetLastInstruction()); + + GlobalValueNumberer gvn(&allocator, graph); + gvn.Run(); + + ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); + ASSERT_TRUE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects()); + ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + } + + // Check that the side effects of the inner loop affects the outer loop. + { + outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); + inner_loop_body->InsertInstructionBefore( + new (&allocator) HInstanceFieldSet( + parameter, parameter, Primitive::kPrimNot, MemberOffset(42)), + inner_loop_body->GetLastInstruction()); + + GlobalValueNumberer gvn(&allocator, graph); + gvn.Run(); + + ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects()); + ASSERT_FALSE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects()); + ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects()); + ASSERT_TRUE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects()); + } +} +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc new file mode 100644 index 0000000000..a0de73da32 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier.cc @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier.h" + +namespace art { + +void InstructionSimplifier::Run() { + VisitInsertionOrder(); +} + +void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) { + HBasicBlock* block = check->GetBlock(); + // Currently always keep the suspend check at entry. + if (block->IsEntryBlock()) return; + + // Currently always keep suspend checks at loop entry. + if (block->IsLoopHeader() && block->GetFirstInstruction() == check) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == check); + return; + } + + // Remove the suspend check that was added at build time for the baseline + // compiler. + block->RemoveInstruction(check); +} + +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h new file mode 100644 index 0000000000..b2f3f521ae --- /dev/null +++ b/compiler/optimizing/instruction_simplifier.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ + +#include "nodes.h" + +namespace art { + +/** + * Implements optimizations specific to each instruction. + */ +class InstructionSimplifier : public HGraphVisitor { + public: + explicit InstructionSimplifier(HGraph* graph) : HGraphVisitor(graph) {} + + void Run(); + + private: + virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index f9ae529b1e..6dd4207795 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -18,6 +18,8 @@ #include "base/stringprintf.h" #include "builder.h" +#include "code_generator.h" +#include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" #include "graph_visualizer.h" @@ -41,8 +43,11 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num ASSERT_NE(graph, nullptr); graph->BuildDominatorTree(); + graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index c797497581..03f8625265 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -15,6 +15,8 @@ */ #include "builder.h" +#include "code_generator.h" +#include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" @@ -30,6 +32,9 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraphBuilder builder(allocator); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); HGraph* graph = builder.BuildGraph(*item); + // Suspend checks implementation may change in the future, and this test relies + // on how instructions are ordered. + RemoveSuspendChecks(graph); graph->BuildDominatorTree(); graph->TransformToSSA(); graph->FindNaturalLoops(); @@ -56,14 +61,16 @@ TEST(LiveRangesTest, CFG1) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(8u, range->GetEnd()); + ASSERT_EQ(9u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(1); ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr); ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition()); @@ -101,14 +108,15 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(22u, range->GetEnd()); + ASSERT_EQ(23u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(3); ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr); ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition()); @@ -138,7 +146,7 @@ TEST(LiveRangesTest, CFG3) { * 22: phi * 24: return * | - * 38: exit + * 28: exit */ const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -149,25 +157,26 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); - // Test for the 0 constant. - LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); + // Test for the 4 constant. + LiveInterval* interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); - ASSERT_EQ(2u, range->GetStart()); + ASSERT_EQ(4u, range->GetStart()); // Last use is the phi at the return block so instruction is live until // the end of the then block. ASSERT_EQ(18u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); - // Test for the 4 constant. - interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); + // Test for the 0 constant. + interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); // The then branch is a hole for this constant, therefore its interval has 2 ranges. // First range starts from the definition and ends at the if block. range = interval->GetFirstRange(); - ASSERT_EQ(4u, range->GetStart()); - // 9 is the end of the if block. + ASSERT_EQ(2u, range->GetStart()); + // 14 is the end of the if block. ASSERT_EQ(14u, range->GetEnd()); // Second range is the else block. range = range->GetNext(); @@ -177,14 +186,15 @@ TEST(LiveRangesTest, CFG3) { ASSERT_TRUE(range->GetNext() == nullptr); // Test for the phi. - interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval(); + interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval(); range = interval->GetFirstRange(); + ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(2)->GetLifetimePosition()); ASSERT_EQ(22u, range->GetStart()); - ASSERT_EQ(24u, range->GetEnd()); + ASSERT_EQ(25u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, Loop) { +TEST(LiveRangesTest, Loop1) { /* * Test the following snippet: * var a = 0; @@ -223,7 +233,9 @@ TEST(LiveRangesTest, Loop) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + RemoveSuspendChecks(graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); // Test for the 0 constant. @@ -248,7 +260,7 @@ TEST(LiveRangesTest, Loop) { range = interval->GetFirstRange(); // The instruction is live until the return instruction after the loop. ASSERT_EQ(6u, range->GetStart()); - ASSERT_EQ(26u, range->GetEnd()); + ASSERT_EQ(27u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the phi. @@ -256,7 +268,171 @@ TEST(LiveRangesTest, Loop) { range = interval->GetFirstRange(); // Instruction is consumed by the if. ASSERT_EQ(14u, range->GetStart()); + ASSERT_EQ(17u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); +} + +TEST(LiveRangesTest, Loop2) { + /* + * Test the following snippet: + * var a = 0; + * while (a == a) { + * a = a + a; + * } + * return a; + * + * Which becomes the following graph (numbered by lifetime position): + * 2: constant0 + * 4: goto + * | + * 8: goto + * | + * 10: phi + * 12: equal + * 14: if +++++ + * | \ + + * | 18: suspend + * | 20: add + * | 22: goto + * | + * 26: return + * | + * 30: exit + * + * We want to make sure the phi at 10 has a lifetime hole after the add at 20. + */ + + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 6, + Instruction::ADD_INT, 0, 0, + Instruction::GOTO | 0xFB00, + Instruction::RETURN | 0 << 8); + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = BuildGraph(data, &allocator); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + liveness.Analyze(); + + // Test for the 0 constant. + HIntConstant* constant = liveness.GetInstructionFromSsaIndex(0)->AsIntConstant(); + LiveInterval* interval = constant->GetLiveInterval(); + LiveRange* range = interval->GetFirstRange(); + ASSERT_EQ(2u, range->GetStart()); + // Last use is the loop phi so instruction is live until + // the end of the pre loop header. + ASSERT_EQ(10u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); + + // Test for the loop phi. + HPhi* phi = liveness.GetInstructionFromSsaIndex(1)->AsPhi(); + interval = phi->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(10u, range->GetStart()); + ASSERT_EQ(21u, range->GetEnd()); + range = range->GetNext(); + ASSERT_TRUE(range != nullptr); + ASSERT_EQ(24u, range->GetStart()); + ASSERT_EQ(27u, range->GetEnd()); + + // Test for the add instruction. + HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd(); + interval = add->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(20u, range->GetStart()); + ASSERT_EQ(24u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); +} + +TEST(LiveRangesTest, CFG4) { + /* + * Test the following snippet: + * var a = 0; + * var b = 4; + * if (a == a) { + * a = b + a; + * } else { + * a = b + a + * } + * return b; + * + * Which becomes the following graph (numbered by lifetime position): + * 2: constant0 + * 4: constant4 + * 6: goto + * | + * 10: equal + * 12: if + * / \ + * 16: add 22: add + * 18: goto 24: goto + * \ / + * 26: phi + * 28: return + * | + * 32: exit + * + * We want to make sure the constant0 has a lifetime hole after the 16: add. + */ + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::CONST_4 | 4 << 12 | 1 << 8, + Instruction::IF_EQ, 5, + Instruction::ADD_INT, 1 << 8, + Instruction::GOTO | 0x300, + Instruction::ADD_INT, 1 << 8, + Instruction::RETURN | 1 << 8); + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = BuildGraph(data, &allocator); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + liveness.Analyze(); + + // Test for the 0 constant. + LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); + LiveRange* range = interval->GetFirstRange(); + ASSERT_EQ(2u, range->GetStart()); ASSERT_EQ(16u, range->GetEnd()); + range = range->GetNext(); + ASSERT_TRUE(range != nullptr); + ASSERT_EQ(20u, range->GetStart()); + ASSERT_EQ(22u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); + + // Test for the 4 constant. + interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(4u, range->GetStart()); + ASSERT_EQ(29u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); + + // Test for the first add. + HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd(); + interval = add->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(16u, range->GetStart()); + ASSERT_EQ(20u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); + + // Test for the second add. + add = liveness.GetInstructionFromSsaIndex(3)->AsAdd(); + interval = add->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(22u, range->GetStart()); + ASSERT_EQ(26u, range->GetEnd()); + ASSERT_TRUE(range->GetNext() == nullptr); + + // Test for the phi, which is unused. + HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi(); + ASSERT_EQ(phi->NumberOfUses(), 0u); + interval = phi->GetLiveInterval(); + range = interval->GetFirstRange(); + ASSERT_EQ(26u, range->GetStart()); + ASSERT_EQ(28u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 7a336204b6..2d861696bb 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -15,6 +15,8 @@ */ #include "builder.h" +#include "code_generator.h" +#include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" @@ -48,7 +50,8 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->BuildDominatorTree(); graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); std::ostringstream buffer; @@ -69,17 +72,17 @@ static void TestCode(const uint16_t* data, const char* expected) { TEST(LivenessTest, CFG1) { const char* expected = "Block 0\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n" + " live in: (0)\n" + " live out: (0)\n" + " kill: (1)\n" "Block 1\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n" + " live in: (0)\n" + " live out: (0)\n" + " kill: (0)\n" "Block 2\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n"; + " live in: (0)\n" + " live out: (0)\n" + " kill: (0)\n"; // Constant is not used. const uint16_t data[] = ONE_REGISTER_CODE_ITEM( @@ -150,32 +153,32 @@ TEST(LivenessTest, CFG4) { // return a; // // Bitsets are made of: - // (constant0, constant4, constant5, phi, equal test) + // (constant0, constant4, constant5, phi) const char* expected = "Block 0\n" // entry block - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" // block with if - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00010)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // else block - " live in: (01000)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0100)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 3\n" // then block - " live in: (00100)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0010)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 4\n" // return block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00001)\n" + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0001)\n" "Block 5\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -195,31 +198,34 @@ TEST(LivenessTest, CFG5) { // a = 4; // } // return a; + // + // Bitsets are made of: + // (constant0, constant4, phi) const char* expected = "Block 0\n" // entry block - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" // block with if - " live in: (1100)\n" - " live out: (1100)\n" - " kill: (0010)\n" + " live in: (110)\n" + " live out: (110)\n" + " kill: (000)\n" "Block 2\n" // else block - " live in: (0100)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 3\n" // return block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0001)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (001)\n" "Block 4\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 5\n" // block to avoid critical edge. Predecessor is 1, successor is 3. - " live in: (1000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (100)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -237,31 +243,33 @@ TEST(LivenessTest, Loop1) { // a = 4; // } // return; + // Bitsets are made of: + // (constant0, constant4, phi) const char* expected = "Block 0\n" // entry block - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" // pre header - " live in: (1100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 2\n" // loop header - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0011)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (001)\n" "Block 3\n" // back edge - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 4\n" // return block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 5\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( @@ -281,31 +289,33 @@ TEST(LivenessTest, Loop3) { // a = 4; // } // return 5; + // Bitsets are made of: + // (constant0, constant4, constant5, phi) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00011)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0001)\n" "Block 3\n" // back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 4\n" // return block - " live in: (00100)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0010)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 5\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -328,36 +338,36 @@ TEST(LivenessTest, Loop4) { // } // return a; // Bitsets are made of: - // (constant0, constant4, phi, equal test) + // (constant0, constant4, phi) const char* expected = "Block 0\n" - " live in: (0000)\n" - " live out: (1100)\n" - " kill: (1100)\n" + " live in: (000)\n" + " live out: (110)\n" + " kill: (110)\n" "Block 1\n" - " live in: (1100)\n" - " live out: (1100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (110)\n" + " kill: (000)\n" "Block 2\n" // loop header - " live in: (0100)\n" - " live out: (0110)\n" - " kill: (0011)\n" + " live in: (010)\n" + " live out: (011)\n" + " kill: (001)\n" "Block 3\n" // back edge - " live in: (0100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (010)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 4\n" // pre loop header - " live in: (1100)\n" - " live out: (0100)\n" - " kill: (0000)\n" + " live in: (110)\n" + " live out: (010)\n" + " kill: (000)\n" "Block 5\n" // return block - " live in: (0010)\n" - " live out: (0000)\n" - " kill: (0000)\n" + " live in: (001)\n" + " live out: (000)\n" + " kill: (000)\n" "Block 6\n" // exit block - " live in: (0000)\n" - " live out: (0000)\n" - " kill: (0000)\n"; + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -375,45 +385,44 @@ TEST(LivenessTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: - // (constant0, constant4, constant5, equal in block 1, phi in block 8, phi in block 4, - // equal in block 4) + // (constant0, constant4, constant5, phi in block 8, phi in block 4) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0001000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" - " live in: (0100000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (01000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 3\n" - " live in: (0010000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00100)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 4\n" // loop header - " live in: (0000000)\n" - " live out: (0000010)\n" - " kill: (0000011)\n" + " live in: (00000)\n" + " live out: (00001)\n" + " kill: (00001)\n" "Block 5\n" // back edge - " live in: (0000010)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00001)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0000010)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00001)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized pre header - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000100)\n"; + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00010)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -430,45 +439,44 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3, - // phi in block 8) + // (constant0, constant4, constant5, phi in block 2, phi in block 8) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" // loop header - " live in: (0110000)\n" - " live out: (0111000)\n" - " kill: (0001100)\n" + " live in: (01100)\n" + " live out: (01110)\n" + " kill: (00010)\n" "Block 3\n" - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000010)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 4\n" // original back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 5\n" // original back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0001000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00010)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000001)\n"; + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00001)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -486,45 +494,44 @@ TEST(LivenessTest, Loop6) { TEST(LivenessTest, Loop7) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, equal in block 2, equal in block 3, - // phi in block 6) + // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = "Block 0\n" - " live in: (0000000)\n" - " live out: (1110000)\n" - " kill: (1110000)\n" + " live in: (00000)\n" + " live out: (11100)\n" + " kill: (11100)\n" "Block 1\n" - " live in: (1110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (11100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 2\n" // loop header - " live in: (0110000)\n" - " live out: (0111000)\n" - " kill: (0001100)\n" + " live in: (01100)\n" + " live out: (01110)\n" + " kill: (00010)\n" "Block 3\n" - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000010)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 4\n" // loop exit - " live in: (0010000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00100)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 5\n" // back edge - " live in: (0110000)\n" - " live out: (0110000)\n" - " kill: (0000000)\n" + " live in: (01100)\n" + " live out: (01100)\n" + " kill: (00000)\n" "Block 6\n" // return block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000001)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00001)\n" "Block 7\n" // exit block - " live in: (0000000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n" + " live in: (00000)\n" + " live out: (00000)\n" + " kill: (00000)\n" "Block 8\n" // synthesized block to avoid critical edge. - " live in: (0001000)\n" - " live out: (0000000)\n" - " kill: (0000000)\n"; + " live in: (00010)\n" + " live out: (00000)\n" + " kill: (00000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -539,4 +546,51 @@ TEST(LivenessTest, Loop7) { TestCode(data, expected); } +TEST(LivenessTest, Loop8) { + // var a = 0; + // while (a == a) { + // a = a + a; + // } + // return a; + // + // We want to test that the ins of the loop exit + // does contain the phi. + // Bitsets are made of: + // (constant0, phi, add) + const char* expected = + "Block 0\n" + " live in: (000)\n" + " live out: (100)\n" + " kill: (100)\n" + "Block 1\n" // pre loop header + " live in: (100)\n" + " live out: (000)\n" + " kill: (000)\n" + "Block 2\n" // loop header + " live in: (000)\n" + " live out: (010)\n" + " kill: (010)\n" + "Block 3\n" // back edge + " live in: (010)\n" + " live out: (000)\n" + " kill: (001)\n" + "Block 4\n" // return block + " live in: (010)\n" + " live out: (000)\n" + " kill: (000)\n" + "Block 5\n" // exit block + " live in: (000)\n" + " live out: (000)\n" + " kill: (000)\n"; + + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 6, + Instruction::ADD_INT, 0, 0, + Instruction::GOTO | 0xFB00, + Instruction::RETURN | 0 << 8); + + TestCode(data, expected); +} + } // namespace art diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 98766d2701..1c36cdf77c 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -20,13 +20,36 @@ namespace art { -LocationSummary::LocationSummary(HInstruction* instruction) +LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), - temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) { + temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), + environment_(instruction->GetBlock()->GetGraph()->GetArena(), + instruction->EnvironmentSize()), + call_kind_(call_kind), + stack_mask_(nullptr), + register_mask_(0), + live_registers_() { inputs_.SetSize(instruction->InputCount()); - for (size_t i = 0; i < instruction->InputCount(); i++) { + for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); } + environment_.SetSize(instruction->EnvironmentSize()); + for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { + environment_.Put(i, Location()); + } + instruction->SetLocations(this); + + if (NeedsSafepoint()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetArena(); + stack_mask_ = new (arena) ArenaBitVector(arena, 0, true); + } +} + + +Location Location::RegisterOrConstant(HInstruction* instruction) { + return instruction->IsConstant() + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresRegister(); } } // namespace art diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 3c60d3cbe8..f358e051ae 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -18,12 +18,14 @@ #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ #include "base/bit_field.h" +#include "base/bit_vector.h" #include "utils/allocation.h" #include "utils/growable_array.h" #include "utils/managed_register.h" namespace art { +class HConstant; class HInstruction; /** @@ -34,23 +36,33 @@ class Location : public ValueObject { public: enum Kind { kInvalid = 0, - kStackSlot = 1, // Word size slot. - kDoubleStackSlot = 2, // 64bit stack slot. - kRegister = 3, + kConstant = 1, + kStackSlot = 2, // Word size slot. + kDoubleStackSlot = 3, // 64bit stack slot. + kRegister = 4, // On 32bits architectures, quick can pass a long where the // low bits are in the last parameter register, and the high // bits are in a stack slot. The kQuickParameter kind is for // handling this special case. - kQuickParameter = 4, + kQuickParameter = 6, // Unallocated location represents a location that is not fixed and can be // allocated by a register allocator. Each unallocated location has // a policy that specifies what kind of location is suitable. Payload // contains register allocation policy. - kUnallocated = 5, + kUnallocated = 7, }; Location() : value_(kInvalid) { + // Verify that non-tagged location kinds do not interfere with kConstantTag. + COMPILE_ASSERT((kInvalid & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kUnallocated & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kStackSlot & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kDoubleStackSlot & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kRegister & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kQuickParameter & kLocationTagMask) != kConstant, TagError); + COMPILE_ASSERT((kConstant & kLocationTagMask) == kConstant, TagError); + DCHECK(!IsValid()); } @@ -61,6 +73,20 @@ class Location : public ValueObject { return *this; } + bool IsConstant() const { + return (value_ & kLocationTagMask) == kConstant; + } + + static Location ConstantLocation(HConstant* constant) { + DCHECK(constant != nullptr); + return Location(kConstant | reinterpret_cast<uword>(constant)); + } + + HConstant* GetConstant() const { + DCHECK(IsConstant()); + return reinterpret_cast<HConstant*>(value_ & ~kLocationTagMask); + } + bool IsValid() const { return value_ != kInvalid; } @@ -69,11 +95,6 @@ class Location : public ValueObject { return !IsValid(); } - bool IsConstant() const { - // TODO: support constants. - return false; - } - // Empty location. Used if there the location should be ignored. static Location NoLocation() { return Location(); @@ -150,9 +171,10 @@ class Location : public ValueObject { arm::ArmManagedRegister AsArm() const; x86::X86ManagedRegister AsX86() const; + x86_64::X86_64ManagedRegister AsX86_64() const; Kind GetKind() const { - return KindField::Decode(value_); + return IsConstant() ? kConstant : KindField::Decode(value_); } bool Equals(Location other) const { @@ -161,12 +183,13 @@ class Location : public ValueObject { const char* DebugString() const { switch (GetKind()) { - case kInvalid: return "?"; + case kInvalid: return "I"; case kRegister: return "R"; case kStackSlot: return "S"; case kDoubleStackSlot: return "DS"; case kQuickParameter: return "Q"; case kUnallocated: return "U"; + case kConstant: return "C"; } return "?"; } @@ -195,6 +218,8 @@ class Location : public ValueObject { return UnallocatedLocation(kRequiresRegister); } + static Location RegisterOrConstant(HInstruction* instruction); + // The location of the first input to the instruction will be // used to replace this unallocated location. static Location SameAsFirstInput() { @@ -214,6 +239,7 @@ class Location : public ValueObject { // Number of bits required to encode Kind value. static constexpr uint32_t kBitsForKind = 4; static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind; + static constexpr uword kLocationTagMask = 0x3; explicit Location(uword value) : value_(value) {} @@ -240,6 +266,34 @@ class Location : public ValueObject { uword value_; }; +class RegisterSet : public ValueObject { + public: + RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + + void Add(Location loc) { + // TODO: floating point registers. + core_registers_ |= (1 << loc.reg().RegId()); + } + + bool ContainsCoreRegister(uint32_t id) { + return Contains(core_registers_, id); + } + + bool ContainsFloatingPointRegister(uint32_t id) { + return Contains(floating_point_registers_, id); + } + + static bool Contains(uint32_t register_set, uint32_t reg) { + return (register_set & (1 << reg)) != 0; + } + + private: + uint32_t core_registers_; + uint32_t floating_point_registers_; + + DISALLOW_COPY_AND_ASSIGN(RegisterSet); +}; + /** * The code generator computes LocationSummary for each instruction so that * the instruction itself knows what code to generate: where to find the inputs @@ -250,7 +304,13 @@ class Location : public ValueObject { */ class LocationSummary : public ArenaObject { public: - explicit LocationSummary(HInstruction* instruction); + enum CallKind { + kNoCall, + kCallOnSlowPath, + kCall + }; + + LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall); void SetInAt(uint32_t at, Location location) { inputs_.Put(at, location); @@ -284,12 +344,74 @@ class LocationSummary : public ArenaObject { return temps_.Size(); } + void SetEnvironmentAt(uint32_t at, Location location) { + environment_.Put(at, location); + } + + Location GetEnvironmentAt(uint32_t at) const { + return environment_.Get(at); + } + Location Out() const { return output_; } + bool CanCall() const { return call_kind_ != kNoCall; } + bool WillCall() const { return call_kind_ == kCall; } + bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; } + bool NeedsSafepoint() const { return CanCall(); } + + void SetStackBit(uint32_t index) { + stack_mask_->SetBit(index); + } + + void ClearStackBit(uint32_t index) { + stack_mask_->ClearBit(index); + } + + void SetRegisterBit(uint32_t reg_id) { + register_mask_ |= (1 << reg_id); + } + + bool RegisterContainsObject(uint32_t reg_id) { + return RegisterSet::Contains(register_mask_, reg_id); + } + + void AddLiveRegister(Location location) { + live_registers_.Add(location); + } + + BitVector* GetStackMask() const { + return stack_mask_; + } + + RegisterSet* GetLiveRegisters() { + return &live_registers_; + } + + bool InputOverlapsWithOutputOrTemp(uint32_t input, bool is_environment) const { + if (is_environment) return true; + Location location = Out(); + // TODO: Add more policies. + if (input == 0 && location.IsUnallocated() && location.GetPolicy() == Location::kSameAsFirstInput) { + return false; + } + return true; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; + GrowableArray<Location> environment_; Location output_; + const CallKind call_kind_; + + // Mask of objects that live in the stack. + BitVector* stack_mask_; + + // Mask of objects that live in register. + uint32_t register_mask_; + + // Registers that are in use at this position. + RegisterSet live_registers_; DISALLOW_COPY_AND_ASSIGN(LocationSummary); }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 752466b0b3..5c4ab8e4c0 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -35,7 +35,7 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_.Get(i); for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { - block->GetSuccessors().Get(j)->RemovePredecessor(block, false); + block->GetSuccessors().Get(j)->RemovePredecessor(block); } for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { block->RemovePhi(it.Current()->AsPhi()); @@ -124,6 +124,7 @@ void HGraph::VisitBlockForDominatorTree(HBasicBlock* block, // dominator of the block. We can then start visiting its successors. if (visits->Get(block->GetBlockId()) == block->GetPredecessors().Size() - block->NumberOfBackEdges()) { + block->GetDominator()->AddDominatedBlock(block); reverse_post_order_.Add(block); for (size_t i = 0; i < block->GetSuccessors().Size(); i++) { VisitBlockForDominatorTree(block->GetSuccessors().Get(i), block, visits); @@ -140,11 +141,10 @@ void HGraph::TransformToSSA() { void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { // Insert a new node between `block` and `successor` to split the // critical edge. - HBasicBlock* new_block = new (arena_) HBasicBlock(this); + HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc()); AddBlock(new_block); new_block->AddInstruction(new (arena_) HGoto()); - block->RemoveSuccessor(successor); - block->AddSuccessor(new_block); + block->ReplaceSuccessor(successor, new_block); new_block->AddSuccessor(successor); if (successor->IsLoopHeader()) { // If we split at a back edge boundary, make the new block the back edge. @@ -162,14 +162,15 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { // If there are more than one back edge, make them branch to the same block that // will become the only back edge. This simplifies finding natural loops in the // graph. - if (info->NumberOfBackEdges() > 1) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this); + // Also, if the loop is a do/while (that is the back edge is an if), change the + // back edge to be a goto. This simplifies code generation of suspend cheks. + if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { + HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(new_back_edge); new_back_edge->AddInstruction(new (arena_) HGoto()); for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - header->RemovePredecessor(back_edge); - back_edge->AddSuccessor(new_back_edge); + back_edge->ReplaceSuccessor(header, new_back_edge); } info->ClearBackEdges(); info->AddBackEdge(new_back_edge); @@ -181,7 +182,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { // loop. size_t number_of_incomings = header->GetPredecessors().Size() - info->NumberOfBackEdges(); if (number_of_incomings != 1) { - HBasicBlock* pre_header = new (arena_) HBasicBlock(this); + HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); @@ -190,13 +191,29 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); if (predecessor != back_edge) { - header->RemovePredecessor(predecessor); + predecessor->ReplaceSuccessor(header, pre_header); pred--; - predecessor->AddSuccessor(pre_header); } } pre_header->AddSuccessor(header); } + + // Make sure the second predecessor of a loop header is the back edge. + if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { + header->SwapPredecessors(); + } + + // Place the suspend check at the beginning of the header, so that live registers + // will be known when allocating registers. Note that code generation can still + // generate the suspend check at the back edge, but needs to be careful with + // loop phi spill slots (which are not written to at back edge). + HInstruction* first_instruction = header->GetFirstInstruction(); + if (!first_instruction->IsSuspendCheck()) { + HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc()); + header->InsertInstructionBefore(check, first_instruction); + first_instruction = check; + } + info->SetSuspendCheck(first_instruction->AsSuspendCheck()); } void HGraph::SimplifyCFG() { @@ -294,12 +311,28 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { DCHECK(cursor->AsPhi() == nullptr); DCHECK(instruction->AsPhi() == nullptr); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); instruction->next_ = cursor; instruction->previous_ = cursor->previous_; cursor->previous_ = instruction; if (GetFirstInstruction() == cursor) { instructions_.first_instruction_ = instruction; + } else { + instruction->previous_->next_ = instruction; } + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); +} + +void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, + HInstruction* replacement) { + DCHECK(initial->GetBlock() == this); + InsertInstructionBefore(replacement, initial); + initial->ReplaceWith(replacement); + RemoveInstruction(initial); } static void Add(HInstructionList* instruction_list, @@ -332,6 +365,16 @@ static void Remove(HInstructionList* instruction_list, for (size_t i = 0; i < instruction->InputCount(); i++) { instruction->InputAt(i)->RemoveUser(instruction, i); } + + HEnvironment* environment = instruction->GetEnvironment(); + if (environment != nullptr) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* vreg = environment->GetInstructionAt(i); + if (vreg != nullptr) { + vreg->RemoveEnvironmentUser(environment, i); + } + } + } } void HBasicBlock::RemoveInstruction(HInstruction* instruction) { @@ -342,13 +385,16 @@ void HBasicBlock::RemovePhi(HPhi* phi) { Remove(&phis_, this, phi); } -void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { - HUseListNode<HInstruction>* previous = nullptr; - HUseListNode<HInstruction>* current = uses_; +template <typename T> +static void RemoveFromUseList(T* user, + size_t input_index, + HUseListNode<T>** list) { + HUseListNode<T>* previous = nullptr; + HUseListNode<T>* current = *list; while (current != nullptr) { if (current->GetUser() == user && current->GetIndex() == input_index) { if (previous == NULL) { - uses_ = current->GetTail(); + *list = current->GetTail(); } else { previous->SetTail(current->GetTail()); } @@ -358,6 +404,14 @@ void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { } } +void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { + RemoveFromUseList(user, input_index, &uses_); +} + +void HInstruction::RemoveEnvironmentUser(HEnvironment* user, size_t input_index) { + RemoveFromUseList(user, input_index, &env_uses_); +} + void HInstructionList::AddInstruction(HInstruction* instruction) { if (first_instruction_ == nullptr) { DCHECK(last_instruction_ == nullptr); @@ -387,6 +441,63 @@ void HInstructionList::RemoveInstruction(HInstruction* instruction) { } } +bool HInstructionList::Contains(HInstruction* instruction) const { + for (HInstructionIterator it(*this); !it.Done(); it.Advance()) { + if (it.Current() == instruction) { + return true; + } + } + return false; +} + +bool HInstructionList::FoundBefore(const HInstruction* instruction1, + const HInstruction* instruction2) const { + DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock()); + for (HInstructionIterator it(*this); !it.Done(); it.Advance()) { + if (it.Current() == instruction1) { + return true; + } + if (it.Current() == instruction2) { + return false; + } + } + LOG(FATAL) << "Did not find an order between two instructions of the same block."; + return true; +} + +bool HInstruction::Dominates(HInstruction* other_instruction) const { + HBasicBlock* block = GetBlock(); + HBasicBlock* other_block = other_instruction->GetBlock(); + if (block != other_block) { + return GetBlock()->Dominates(other_instruction->GetBlock()); + } else { + // If both instructions are in the same block, ensure this + // instruction comes before `other_instruction`. + if (IsPhi()) { + if (!other_instruction->IsPhi()) { + // Phis appear before non phi-instructions so this instruction + // dominates `other_instruction`. + return true; + } else { + // There is no order among phis. + LOG(FATAL) << "There is no dominance between phis of a same block."; + return false; + } + } else { + // `this` is not a phi. + if (other_instruction->IsPhi()) { + // Phis appear before non phi-instructions so this instruction + // does not dominate `other_instruction`. + return false; + } else { + // Check whether this instruction comes before + // `other_instruction` in the instruction list. + return block->GetInstructions().FoundBefore(this, other_instruction); + } + } + } +} + void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(other != nullptr); for (HUseIterator<HInstruction> it(GetUses()); !it.Done(); it.Advance()) { @@ -409,6 +520,10 @@ void HInstruction::ReplaceWith(HInstruction* other) { env_uses_ = nullptr; } +size_t HInstruction::EnvironmentSize() const { + return HasEnvironment() ? environment_->Size() : 0; +} + void HPhi::AddInput(HInstruction* input) { DCHECK(input->GetBlock() != nullptr); inputs_.Add(input); @@ -440,4 +555,57 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } +HConstant* HBinaryOperation::TryStaticEvaluation(ArenaAllocator* allocator) const { + if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { + int32_t value = Evaluate(GetLeft()->AsIntConstant()->GetValue(), + GetRight()->AsIntConstant()->GetValue()); + return new(allocator) HIntConstant(value); + } else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) { + int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(), + GetRight()->AsLongConstant()->GetValue()); + return new(allocator) HLongConstant(value); + } + return nullptr; +} + +bool HCondition::NeedsMaterialization() const { + if (!HasOnlyOneUse()) { + return true; + } + HUseListNode<HInstruction>* uses = GetUses(); + HInstruction* user = uses->GetUser(); + if (!user->IsIf()) { + return true; + } + + // TODO: if there is no intervening instructions with side-effect between this condition + // and the If instruction, we should move the condition just before the If. + if (GetNext() != user) { + return true; + } + return false; +} + +bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { + HInstruction* previous = if_->GetPrevious(); + while (previous != nullptr && previous->IsParallelMove()) { + previous = previous->GetPrevious(); + } + return previous == this; +} + +bool HInstruction::Equals(HInstruction* other) const { + if (!InstructionTypeEquals(other)) return false; + DCHECK_EQ(GetKind(), other->GetKind()); + if (!InstructionDataEquals(other)) return false; + if (GetType() != other->GetType()) return false; + if (InputCount() != other->InputCount()) return false; + + for (size_t i = 0, e = InputCount(); i < e; ++i) { + if (InputAt(i) != other->InputAt(i)) return false; + } + DCHECK_EQ(ComputeHashCode(), other->ComputeHashCode()); + return true; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b1c8016112..3d65366c43 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -18,6 +18,8 @@ #define ART_COMPILER_OPTIMIZING_NODES_H_ #include "locations.h" +#include "offsets.h" +#include "primitive.h" #include "utils/allocation.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" @@ -30,14 +32,25 @@ class HInstruction; class HIntConstant; class HGraphVisitor; class HPhi; +class HSuspendCheck; class LiveInterval; class LocationSummary; static const int kDefaultNumberOfBlocks = 8; static const int kDefaultNumberOfSuccessors = 2; static const int kDefaultNumberOfPredecessors = 2; +static const int kDefaultNumberOfDominatedBlocks = 1; static const int kDefaultNumberOfBackEdges = 1; +enum IfCondition { + kCondEQ, + kCondNE, + kCondLT, + kCondLE, + kCondGT, + kCondGE, +}; + class HInstructionList { public: HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {} @@ -45,6 +58,15 @@ class HInstructionList { void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); + // Return true if this list contains `instruction`. + bool Contains(HInstruction* instruction) const; + + // Return true if `instruction1` is found before `instruction2` in + // this instruction list and false otherwise. Abort if none + // of these instructions is found. + bool FoundBefore(const HInstruction* instruction1, + const HInstruction* instruction2) const; + private: HInstruction* first_instruction_; HInstruction* last_instruction_; @@ -66,7 +88,8 @@ class HGraph : public ArenaObject { maximum_number_of_out_vregs_(0), number_of_vregs_(0), number_of_in_vregs_(0), - current_instruction_id_(0) { } + number_of_temporaries_(0), + current_instruction_id_(0) {} ArenaAllocator* GetArena() const { return arena_; } const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -103,6 +126,14 @@ class HGraph : public ArenaObject { maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_); } + void UpdateNumberOfTemporaries(size_t count) { + number_of_temporaries_ = std::max(count, number_of_temporaries_); + } + + size_t GetNumberOfTemporaries() const { + return number_of_temporaries_; + } + void SetNumberOfVRegs(uint16_t number_of_vregs) { number_of_vregs_ = number_of_vregs; } @@ -119,6 +150,10 @@ class HGraph : public ArenaObject { return number_of_in_vregs_; } + uint16_t GetNumberOfLocalVRegs() const { + return number_of_vregs_ - number_of_in_vregs_; + } + const GrowableArray<HBasicBlock*>& GetReversePostOrder() const { return reverse_post_order_; } @@ -154,6 +189,9 @@ class HGraph : public ArenaObject { // The number of virtual registers used by parameters of this method. uint16_t number_of_in_vregs_; + // The number of temporaries that will be needed for the baseline compiler. + size_t number_of_temporaries_; + // The current id to assign to a newly added instruction. See HInstruction.id_. int current_instruction_id_; @@ -164,13 +202,19 @@ class HLoopInformation : public ArenaObject { public: HLoopInformation(HBasicBlock* header, HGraph* graph) : header_(header), + suspend_check_(nullptr), back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges), - blocks_(graph->GetArena(), graph->GetBlocks().Size(), false) {} + // Make bit vector growable, as the number of blocks may change. + blocks_(graph->GetArena(), graph->GetBlocks().Size(), true) {} HBasicBlock* GetHeader() const { return header_; } + HSuspendCheck* GetSuspendCheck() const { return suspend_check_; } + void SetSuspendCheck(HSuspendCheck* check) { suspend_check_ = check; } + bool HasSuspendCheck() const { return suspend_check_ != nullptr; } + void AddBackEdge(HBasicBlock* back_edge) { back_edges_.Add(back_edge); } @@ -219,6 +263,7 @@ class HLoopInformation : public ArenaObject { void PopulateRecursive(HBasicBlock* block); HBasicBlock* header_; + HSuspendCheck* suspend_check_; GrowableArray<HBasicBlock*> back_edges_; ArenaBitVector blocks_; @@ -226,19 +271,23 @@ class HLoopInformation : public ArenaObject { }; static constexpr size_t kNoLifetime = -1; +static constexpr uint32_t kNoDexPc = -1; // A block in a method. Contains the list of instructions represented // as a double linked list. Each block knows its predecessors and // successors. + class HBasicBlock : public ArenaObject { public: - explicit HBasicBlock(HGraph* graph) + explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc) : graph_(graph), predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors), successors_(graph->GetArena(), kDefaultNumberOfSuccessors), loop_information_(nullptr), dominator_(nullptr), + dominated_blocks_(graph->GetArena(), kDefaultNumberOfDominatedBlocks), block_id_(-1), + dex_pc_(dex_pc), lifetime_start_(kNoLifetime), lifetime_end_(kNoLifetime) {} @@ -250,6 +299,18 @@ class HBasicBlock : public ArenaObject { return successors_; } + const GrowableArray<HBasicBlock*>& GetDominatedBlocks() const { + return dominated_blocks_; + } + + bool IsEntryBlock() const { + return graph_->GetEntryBlock() == this; + } + + bool IsExitBlock() const { + return graph_->GetExitBlock() == this; + } + void AddBackEdge(HBasicBlock* back_edge) { if (loop_information_ == nullptr) { loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_); @@ -265,6 +326,7 @@ class HBasicBlock : public ArenaObject { HBasicBlock* GetDominator() const { return dominator_; } void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; } + void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); } int NumberOfBackEdges() const { return loop_information_ == nullptr @@ -283,18 +345,16 @@ class HBasicBlock : public ArenaObject { block->predecessors_.Add(this); } - void RemovePredecessor(HBasicBlock* block, bool remove_in_successor = true) { - predecessors_.Delete(block); - if (remove_in_successor) { - block->successors_.Delete(this); - } + void ReplaceSuccessor(HBasicBlock* existing, HBasicBlock* new_block) { + size_t successor_index = GetSuccessorIndexOf(existing); + DCHECK_NE(successor_index, static_cast<size_t>(-1)); + existing->RemovePredecessor(this); + new_block->predecessors_.Add(this); + successors_.Put(successor_index, new_block); } - void RemoveSuccessor(HBasicBlock* block, bool remove_in_predecessor = true) { - successors_.Delete(block); - if (remove_in_predecessor) { - block->predecessors_.Delete(this); - } + void RemovePredecessor(HBasicBlock* block) { + predecessors_.Delete(block); } void ClearAllPredecessors() { @@ -306,6 +366,13 @@ class HBasicBlock : public ArenaObject { block->successors_.Add(this); } + void SwapPredecessors() { + DCHECK_EQ(predecessors_.Size(), 2u); + HBasicBlock* temp = predecessors_.Get(0); + predecessors_.Put(0, predecessors_.Get(1)); + predecessors_.Put(1, temp); + } + size_t GetPredecessorIndexOf(HBasicBlock* predecessor) { for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { if (predecessors_.Get(i) == predecessor) { @@ -315,9 +382,21 @@ class HBasicBlock : public ArenaObject { return -1; } + size_t GetSuccessorIndexOf(HBasicBlock* successor) { + for (size_t i = 0, e = successors_.Size(); i < e; ++i) { + if (successors_.Get(i) == successor) { + return i; + } + } + return -1; + } + void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + // Replace instruction `initial` with `replacement` within this block. + void ReplaceAndRemoveInstructionWith(HInstruction* initial, + HInstruction* replacement); void AddPhi(HPhi* phi); void RemovePhi(HPhi* phi); @@ -325,6 +404,12 @@ class HBasicBlock : public ArenaObject { return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); } + bool IsLoopPreHeaderFirstPredecessor() const { + DCHECK(IsLoopHeader()); + DCHECK(!GetPredecessors().IsEmpty()); + return GetPredecessors().Get(0) == GetLoopInformation()->GetPreHeader(); + } + HLoopInformation* GetLoopInformation() const { return loop_information_; } @@ -348,6 +433,8 @@ class HBasicBlock : public ArenaObject { } } + bool IsInLoop() const { return loop_information_ != nullptr; } + // Returns wheter this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; @@ -357,6 +444,8 @@ class HBasicBlock : public ArenaObject { void SetLifetimeStart(size_t start) { lifetime_start_ = start; } void SetLifetimeEnd(size_t end) { lifetime_end_ = end; } + uint32_t GetDexPc() const { return dex_pc_; } + private: HGraph* const graph_; GrowableArray<HBasicBlock*> predecessors_; @@ -365,21 +454,31 @@ class HBasicBlock : public ArenaObject { HInstructionList phis_; HLoopInformation* loop_information_; HBasicBlock* dominator_; + GrowableArray<HBasicBlock*> dominated_blocks_; int block_id_; + // The dex program counter of the first instruction of this block. + const uint32_t dex_pc_; size_t lifetime_start_; size_t lifetime_end_; DISALLOW_COPY_AND_ASSIGN(HBasicBlock); }; -#define FOR_EACH_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Add) \ + M(Condition) \ M(Equal) \ + M(NotEqual) \ + M(LessThan) \ + M(LessThanOrEqual) \ + M(GreaterThan) \ + M(GreaterThanOrEqual) \ M(Exit) \ M(Goto) \ M(If) \ M(IntConstant) \ M(InvokeStatic) \ + M(InvokeVirtual) \ M(LoadLocal) \ M(Local) \ M(LongConstant) \ @@ -392,21 +491,41 @@ class HBasicBlock : public ArenaObject { M(ReturnVoid) \ M(StoreLocal) \ M(Sub) \ + M(Compare) \ + M(InstanceFieldGet) \ + M(InstanceFieldSet) \ + M(ArrayGet) \ + M(ArraySet) \ + M(ArrayLength) \ + M(BoundsCheck) \ + M(NullCheck) \ + M(Temporary) \ + M(SuspendCheck) \ + +#define FOR_EACH_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION(M) \ + M(Constant) \ + M(BinaryOperation) #define FORWARD_DECLARATION(type) class H##type; FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) #undef FORWARD_DECLARATION -#define DECLARE_INSTRUCTION(type) \ - virtual const char* DebugName() const { return #type; } \ - virtual H##type* As##type() { return this; } \ - virtual void Accept(HGraphVisitor* visitor) \ +#define DECLARE_INSTRUCTION(type) \ + virtual InstructionKind GetKind() const { return k##type; } \ + virtual const char* DebugName() const { return #type; } \ + virtual const H##type* As##type() const OVERRIDE { return this; } \ + virtual H##type* As##type() OVERRIDE { return this; } \ + virtual bool InstructionTypeEquals(HInstruction* other) const { \ + return other->Is##type(); \ + } \ + virtual void Accept(HGraphVisitor* visitor) template <typename T> class HUseListNode : public ArenaObject { public: HUseListNode(T* user, size_t index, HUseListNode* tail) - : user_(user), index_(index), tail_(tail) { } + : user_(user), index_(index), tail_(tail) {} HUseListNode* GetTail() const { return tail_; } T* GetUser() const { return user_; } @@ -422,9 +541,72 @@ class HUseListNode : public ArenaObject { DISALLOW_COPY_AND_ASSIGN(HUseListNode); }; +// Represents the side effects an instruction may have. +class SideEffects : public ValueObject { + public: + SideEffects() : flags_(0) {} + + static SideEffects None() { + return SideEffects(0); + } + + static SideEffects All() { + return SideEffects(ChangesSomething().flags_ | DependsOnSomething().flags_); + } + + static SideEffects ChangesSomething() { + return SideEffects((1 << kFlagChangesCount) - 1); + } + + static SideEffects DependsOnSomething() { + int count = kFlagDependsOnCount - kFlagChangesCount; + return SideEffects(((1 << count) - 1) << kFlagChangesCount); + } + + SideEffects Union(SideEffects other) const { + return SideEffects(flags_ | other.flags_); + } + + bool HasSideEffects() const { + size_t all_bits_set = (1 << kFlagChangesCount) - 1; + return (flags_ & all_bits_set) != 0; + } + + bool HasAllSideEffects() const { + size_t all_bits_set = (1 << kFlagChangesCount) - 1; + return all_bits_set == (flags_ & all_bits_set); + } + + bool DependsOn(SideEffects other) const { + size_t depends_flags = other.ComputeDependsFlags(); + return (flags_ & depends_flags) != 0; + } + + bool HasDependencies() const { + int count = kFlagDependsOnCount - kFlagChangesCount; + size_t all_bits_set = (1 << count) - 1; + return ((flags_ >> kFlagChangesCount) & all_bits_set) != 0; + } + + private: + static constexpr int kFlagChangesSomething = 0; + static constexpr int kFlagChangesCount = kFlagChangesSomething + 1; + + static constexpr int kFlagDependsOnSomething = kFlagChangesCount; + static constexpr int kFlagDependsOnCount = kFlagDependsOnSomething + 1; + + explicit SideEffects(size_t flags) : flags_(flags) {} + + size_t ComputeDependsFlags() const { + return flags_ << kFlagChangesCount; + } + + size_t flags_; +}; + class HInstruction : public ArenaObject { public: - HInstruction() + explicit HInstruction(SideEffects side_effects) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -435,17 +617,27 @@ class HInstruction : public ArenaObject { environment_(nullptr), locations_(nullptr), live_interval_(nullptr), - lifetime_position_(kNoLifetime) {} + lifetime_position_(kNoLifetime), + side_effects_(side_effects) {} - virtual ~HInstruction() { } + virtual ~HInstruction() {} + +#define DECLARE_KIND(type) k##type, + enum InstructionKind { + FOR_EACH_INSTRUCTION(DECLARE_KIND) + }; +#undef DECLARE_KIND HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } HBasicBlock* GetBlock() const { return block_; } void SetBlock(HBasicBlock* block) { block_ = block; } + bool IsInBlock() const { return block_ != nullptr; } + bool IsInLoop() const { return block_->IsInLoop(); } + bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); } - virtual size_t InputCount() const = 0; + virtual size_t InputCount() const = 0; virtual HInstruction* InputAt(size_t i) const = 0; virtual void Accept(HGraphVisitor* visitor) = 0; @@ -455,22 +647,27 @@ class HInstruction : public ArenaObject { virtual void SetRawInputAt(size_t index, HInstruction* input) = 0; virtual bool NeedsEnvironment() const { return false; } + virtual bool IsControlFlow() const { return false; } + bool HasSideEffects() const { return side_effects_.HasSideEffects(); } void AddUseAt(HInstruction* user, size_t index) { uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_); } void AddEnvUseAt(HEnvironment* user, size_t index) { + DCHECK(user != nullptr); env_uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HEnvironment>( user, index, env_uses_); } void RemoveUser(HInstruction* user, size_t index); + void RemoveEnvironmentUser(HEnvironment* user, size_t index); HUseListNode<HInstruction>* GetUses() const { return uses_; } HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; } bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; } + bool HasEnvironmentUses() const { return env_uses_ != nullptr; } size_t NumberOfUses() const { // TODO: Optimize this method if it is used outside of the HGraphVisualizer. @@ -483,6 +680,10 @@ class HInstruction : public ArenaObject { return result; } + // Does this instruction dominate `other_instruction`? Aborts if + // this instruction and `other_instruction` are both phis. + bool Dominates(HInstruction* other_instruction) const; + int GetId() const { return id_; } void SetId(int id) { id_ = id; } @@ -494,17 +695,55 @@ class HInstruction : public ArenaObject { HEnvironment* GetEnvironment() const { return environment_; } void SetEnvironment(HEnvironment* environment) { environment_ = environment; } + // Returns the number of entries in the environment. Typically, that is the + // number of dex registers in a method. It could be more in case of inlining. + size_t EnvironmentSize() const; + LocationSummary* GetLocations() const { return locations_; } void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); + bool HasOnlyOneUse() const { + return uses_ != nullptr && uses_->GetTail() == nullptr; + } + #define INSTRUCTION_TYPE_CHECK(type) \ + bool Is##type() const { return (As##type() != nullptr); } \ + virtual const H##type* As##type() const { return nullptr; } \ virtual H##type* As##type() { return nullptr; } FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK + // Returns whether the instruction can be moved within the graph. + virtual bool CanBeMoved() const { return false; } + + // Returns whether the two instructions are of the same kind. + virtual bool InstructionTypeEquals(HInstruction* other) const { return false; } + + // Returns whether any data encoded in the two instructions is equal. + // This method does not look at the inputs. Both instructions must be + // of the same type, otherwise the method has undefined behavior. + virtual bool InstructionDataEquals(HInstruction* other) const { return false; } + + // Returns whether two instructions are equal, that is: + // 1) They have the same type and contain the same data, + // 2) Their inputs are identical. + bool Equals(HInstruction* other) const; + + virtual InstructionKind GetKind() const = 0; + + virtual size_t ComputeHashCode() const { + size_t result = GetKind(); + for (size_t i = 0, e = InputCount(); i < e; ++i) { + result = (result * 31) + InputAt(i)->GetId(); + } + return result; + } + + SideEffects GetSideEffects() const { return side_effects_; } + size_t GetLifetimePosition() const { return lifetime_position_; } void SetLifetimePosition(size_t position) { lifetime_position_ = position; } LiveInterval* GetLiveInterval() const { return live_interval_; } @@ -518,7 +757,7 @@ class HInstruction : public ArenaObject { // An instruction gets an id when it is added to the graph. // It reflects creation order. A negative id means the instruction - // has not beed added to the graph. + // has not been added to the graph. int id_; // When doing liveness analysis, instructions that have uses get an SSA index. @@ -530,6 +769,8 @@ class HInstruction : public ArenaObject { // List of environments that contain this instruction. HUseListNode<HEnvironment>* env_uses_; + // The environment associated with this instruction. Not null if the instruction + // might jump out of the method. HEnvironment* environment_; // Set by the code generator. @@ -542,6 +783,8 @@ class HInstruction : public ArenaObject { // order of blocks where this instruction's live interval start. size_t lifetime_position_; + const SideEffects side_effects_; + friend class HBasicBlock; friend class HInstructionList; @@ -595,10 +838,16 @@ class HEnvironment : public ArenaObject { vregs_.Put(index, instruction); } + HInstruction* GetInstructionAt(size_t index) const { + return vregs_.Get(index); + } + GrowableArray<HInstruction*>* GetVRegs() { return &vregs_; } + size_t Size() const { return vregs_.Size(); } + private: GrowableArray<HInstruction*> vregs_; @@ -607,7 +856,7 @@ class HEnvironment : public ArenaObject { class HInputIterator : public ValueObject { public: - explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) { } + explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {} bool Done() const { return index_ == instruction_->InputCount(); } HInstruction* Current() const { return instruction_->InputAt(index_); } @@ -667,7 +916,7 @@ class HBackwardInstructionIterator : public ValueObject { template<typename T, intptr_t N> class EmbeddedArray { public: - EmbeddedArray() : elements_() { } + EmbeddedArray() : elements_() {} intptr_t GetLength() const { return N; } @@ -712,8 +961,9 @@ class EmbeddedArray<T, 0> { template<intptr_t N> class HTemplateInstruction: public HInstruction { public: - HTemplateInstruction<N>() : inputs_() { } - virtual ~HTemplateInstruction() { } + HTemplateInstruction<N>(SideEffects side_effects) + : HInstruction(side_effects), inputs_() {} + virtual ~HTemplateInstruction() {} virtual size_t InputCount() const { return N; } virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; } @@ -729,11 +979,26 @@ class HTemplateInstruction: public HInstruction { friend class SsaBuilder; }; +template<intptr_t N> +class HExpression : public HTemplateInstruction<N> { + public: + HExpression<N>(Primitive::Type type, SideEffects side_effects) + : HTemplateInstruction<N>(side_effects), type_(type) {} + virtual ~HExpression() {} + + virtual Primitive::Type GetType() const { return type_; } + + private: + const Primitive::Type type_; +}; + // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. class HReturnVoid : public HTemplateInstruction<0> { public: - HReturnVoid() { } + HReturnVoid() : HTemplateInstruction(SideEffects::None()) {} + + virtual bool IsControlFlow() const { return true; } DECLARE_INSTRUCTION(ReturnVoid); @@ -745,10 +1010,12 @@ class HReturnVoid : public HTemplateInstruction<0> { // instruction that branches to the exit block. class HReturn : public HTemplateInstruction<1> { public: - explicit HReturn(HInstruction* value) { + explicit HReturn(HInstruction* value) : HTemplateInstruction(SideEffects::None()) { SetRawInputAt(0, value); } + virtual bool IsControlFlow() const { return true; } + DECLARE_INSTRUCTION(Return); private: @@ -760,7 +1027,9 @@ class HReturn : public HTemplateInstruction<1> { // exit block. class HExit : public HTemplateInstruction<0> { public: - HExit() { } + HExit() : HTemplateInstruction(SideEffects::None()) {} + + virtual bool IsControlFlow() const { return true; } DECLARE_INSTRUCTION(Exit); @@ -771,7 +1040,9 @@ class HExit : public HTemplateInstruction<0> { // Jumps from one block to another. class HGoto : public HTemplateInstruction<0> { public: - HGoto() { } + HGoto() : HTemplateInstruction(SideEffects::None()) {} + + virtual bool IsControlFlow() const { return true; } HBasicBlock* GetSuccessor() const { return GetBlock()->GetSuccessors().Get(0); @@ -783,14 +1054,17 @@ class HGoto : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HGoto); }; + // Conditional branch. A block ending with an HIf instruction must have // two successors. class HIf : public HTemplateInstruction<1> { public: - explicit HIf(HInstruction* input) { + explicit HIf(HInstruction* input) : HTemplateInstruction(SideEffects::None()) { SetRawInputAt(0, input); } + virtual bool IsControlFlow() const { return true; } + HBasicBlock* IfTrueSuccessor() const { return GetBlock()->GetSuccessors().Get(0); } @@ -801,53 +1075,212 @@ class HIf : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); + virtual bool IsIfInstruction() const { return true; } + private: DISALLOW_COPY_AND_ASSIGN(HIf); }; -class HBinaryOperation : public HTemplateInstruction<2> { +class HBinaryOperation : public HExpression<2> { public: HBinaryOperation(Primitive::Type result_type, HInstruction* left, - HInstruction* right) : result_type_(result_type) { + HInstruction* right) : HExpression(result_type, SideEffects::None()) { SetRawInputAt(0, left); SetRawInputAt(1, right); } HInstruction* GetLeft() const { return InputAt(0); } HInstruction* GetRight() const { return InputAt(1); } - Primitive::Type GetResultType() const { return result_type_; } + Primitive::Type GetResultType() const { return GetType(); } virtual bool IsCommutative() { return false; } - virtual Primitive::Type GetType() const { return GetResultType(); } - private: - const Primitive::Type result_type_; + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + // Try to statically evaluate `operation` and return an HConstant + // containing the result of this evaluation. If `operation` cannot + // be evaluated as a constant, return nullptr. + HConstant* TryStaticEvaluation(ArenaAllocator* allocator) const; + + // Apply this operation to `x` and `y`. + virtual int32_t Evaluate(int32_t x, int32_t y) const = 0; + virtual int64_t Evaluate(int64_t x, int64_t y) const = 0; + + DECLARE_INSTRUCTION(BinaryOperation); + + private: DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); }; - -// Instruction to check if two inputs are equal to each other. -class HEqual : public HBinaryOperation { +class HCondition : public HBinaryOperation { public: - HEqual(HInstruction* first, HInstruction* second) + HCondition(HInstruction* first, HInstruction* second) : HBinaryOperation(Primitive::kPrimBoolean, first, second) {} virtual bool IsCommutative() { return true; } - virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; } + // For register allocation purposes, returns whether this instruction needs to be + // materialized (that is, not just be in the processor flags). + bool NeedsMaterialization() const; + + // For code generation purposes, returns whether this instruction is just before + // `if_`, and disregard moves in between. + bool IsBeforeWhenDisregardMoves(HIf* if_) const; + + DECLARE_INSTRUCTION(Condition); + + virtual IfCondition GetCondition() const = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(HCondition); +}; + +// Instruction to check if two inputs are equal to each other. +class HEqual : public HCondition { + public: + HEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x == y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x == y; } DECLARE_INSTRUCTION(Equal); + virtual IfCondition GetCondition() const { + return kCondEQ; + } + private: DISALLOW_COPY_AND_ASSIGN(HEqual); }; +class HNotEqual : public HCondition { + public: + HNotEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x != y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x != y; } + + DECLARE_INSTRUCTION(NotEqual); + + virtual IfCondition GetCondition() const { + return kCondNE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HNotEqual); +}; + +class HLessThan : public HCondition { + public: + HLessThan(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x < y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x < y; } + + DECLARE_INSTRUCTION(LessThan); + + virtual IfCondition GetCondition() const { + return kCondLT; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HLessThan); +}; + +class HLessThanOrEqual : public HCondition { + public: + HLessThanOrEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x <= y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x <= y; } + + DECLARE_INSTRUCTION(LessThanOrEqual); + + virtual IfCondition GetCondition() const { + return kCondLE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual); +}; + +class HGreaterThan : public HCondition { + public: + HGreaterThan(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x > y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x > y; } + + DECLARE_INSTRUCTION(GreaterThan); + + virtual IfCondition GetCondition() const { + return kCondGT; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HGreaterThan); +}; + +class HGreaterThanOrEqual : public HCondition { + public: + HGreaterThanOrEqual(HInstruction* first, HInstruction* second) + : HCondition(first, second) {} + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x >= y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x >= y; } + + DECLARE_INSTRUCTION(GreaterThanOrEqual); + + virtual IfCondition GetCondition() const { + return kCondGE; + } + + private: + DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual); +}; + + +// Instruction to check how two inputs compare to each other. +// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1. +class HCompare : public HBinaryOperation { + public: + HCompare(Primitive::Type type, HInstruction* first, HInstruction* second) + : HBinaryOperation(Primitive::kPrimInt, first, second) { + DCHECK_EQ(type, first->GetType()); + DCHECK_EQ(type, second->GetType()); + } + + virtual int32_t Evaluate(int32_t x, int32_t y) const { + return + x == y ? 0 : + x > y ? 1 : + -1; + } + virtual int64_t Evaluate(int64_t x, int64_t y) const { + return + x == y ? 0 : + x > y ? 1 : + -1; + } + + DECLARE_INSTRUCTION(Compare); + + private: + DISALLOW_COPY_AND_ASSIGN(HCompare); +}; + // A local in the graph. Corresponds to a Dex register. class HLocal : public HTemplateInstruction<0> { public: - explicit HLocal(uint16_t reg_number) : reg_number_(reg_number) { } + explicit HLocal(uint16_t reg_number) + : HTemplateInstruction(SideEffects::None()), reg_number_(reg_number) {} DECLARE_INSTRUCTION(Local); @@ -861,21 +1294,18 @@ class HLocal : public HTemplateInstruction<0> { }; // Load a given local. The local is an input of this instruction. -class HLoadLocal : public HTemplateInstruction<1> { +class HLoadLocal : public HExpression<1> { public: - explicit HLoadLocal(HLocal* local, Primitive::Type type) : type_(type) { + HLoadLocal(HLocal* local, Primitive::Type type) + : HExpression(type, SideEffects::None()) { SetRawInputAt(0, local); } - virtual Primitive::Type GetType() const { return type_; } - HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); } DECLARE_INSTRUCTION(LoadLocal); private: - const Primitive::Type type_; - DISALLOW_COPY_AND_ASSIGN(HLoadLocal); }; @@ -883,7 +1313,7 @@ class HLoadLocal : public HTemplateInstruction<1> { // and the local. class HStoreLocal : public HTemplateInstruction<2> { public: - HStoreLocal(HLocal* local, HInstruction* value) { + HStoreLocal(HLocal* local, HInstruction* value) : HTemplateInstruction(SideEffects::None()) { SetRawInputAt(0, local); SetRawInputAt(1, value); } @@ -896,14 +1326,31 @@ class HStoreLocal : public HTemplateInstruction<2> { DISALLOW_COPY_AND_ASSIGN(HStoreLocal); }; +class HConstant : public HExpression<0> { + public: + explicit HConstant(Primitive::Type type) : HExpression(type, SideEffects::None()) {} + + virtual bool CanBeMoved() const { return true; } + + DECLARE_INSTRUCTION(Constant); + + private: + DISALLOW_COPY_AND_ASSIGN(HConstant); +}; + // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). -class HIntConstant : public HTemplateInstruction<0> { +class HIntConstant : public HConstant { public: - explicit HIntConstant(int32_t value) : value_(value) { } + explicit HIntConstant(int32_t value) : HConstant(Primitive::kPrimInt), value_(value) {} int32_t GetValue() const { return value_; } - virtual Primitive::Type GetType() const { return Primitive::kPrimInt; } + + virtual bool InstructionDataEquals(HInstruction* other) const { + return other->AsIntConstant()->value_ == value_; + } + + virtual size_t ComputeHashCode() const { return GetValue(); } DECLARE_INSTRUCTION(IntConstant); @@ -913,13 +1360,17 @@ class HIntConstant : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; -class HLongConstant : public HTemplateInstruction<0> { +class HLongConstant : public HConstant { public: - explicit HLongConstant(int64_t value) : value_(value) { } + explicit HLongConstant(int64_t value) : HConstant(Primitive::kPrimLong), value_(value) {} int64_t GetValue() const { return value_; } - virtual Primitive::Type GetType() const { return Primitive::kPrimLong; } + virtual bool InstructionDataEquals(HInstruction* other) const { + return other->AsLongConstant()->value_ == value_; + } + + virtual size_t ComputeHashCode() const { return static_cast<size_t>(GetValue()); } DECLARE_INSTRUCTION(LongConstant); @@ -935,7 +1386,8 @@ class HInvoke : public HInstruction { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc) - : inputs_(arena, number_of_arguments), + : HInstruction(SideEffects::All()), + inputs_(arena, number_of_arguments), return_type_(return_type), dex_pc_(dex_pc) { inputs_.SetSize(number_of_arguments); @@ -989,15 +1441,36 @@ class HInvokeStatic : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStatic); }; -class HNewInstance : public HTemplateInstruction<0> { +class HInvokeVirtual : public HInvoke { public: - HNewInstance(uint32_t dex_pc, uint16_t type_index) : dex_pc_(dex_pc), type_index_(type_index) {} + HInvokeVirtual(ArenaAllocator* arena, + uint32_t number_of_arguments, + Primitive::Type return_type, + uint32_t dex_pc, + uint32_t vtable_index) + : HInvoke(arena, number_of_arguments, return_type, dex_pc), + vtable_index_(vtable_index) {} + + uint32_t GetVTableIndex() const { return vtable_index_; } + + DECLARE_INSTRUCTION(InvokeVirtual); + + private: + const uint32_t vtable_index_; + + DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual); +}; + +class HNewInstance : public HExpression<0> { + public: + HNewInstance(uint32_t dex_pc, uint16_t type_index) + : HExpression(Primitive::kPrimNot, SideEffects::None()), + dex_pc_(dex_pc), + type_index_(type_index) {} uint32_t GetDexPc() const { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } - virtual Primitive::Type GetType() const { return Primitive::kPrimNot; } - // Calls runtime so needs an environment. virtual bool NeedsEnvironment() const { return true; } @@ -1017,6 +1490,9 @@ class HAdd : public HBinaryOperation { virtual bool IsCommutative() { return true; } + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; } + DECLARE_INSTRUCTION(Add); private: @@ -1030,6 +1506,9 @@ class HSub : public HBinaryOperation { virtual bool IsCommutative() { return false; } + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; } + DECLARE_INSTRUCTION(Sub); private: @@ -1038,15 +1517,13 @@ class HSub : public HBinaryOperation { // The value of a parameter in this method. Its location depends on // the calling convention. -class HParameterValue : public HTemplateInstruction<0> { +class HParameterValue : public HExpression<0> { public: HParameterValue(uint8_t index, Primitive::Type parameter_type) - : index_(index), parameter_type_(parameter_type) {} + : HExpression(parameter_type, SideEffects::None()), index_(index) {} uint8_t GetIndex() const { return index_; } - virtual Primitive::Type GetType() const { return parameter_type_; } - DECLARE_INSTRUCTION(ParameterValue); private: @@ -1054,18 +1531,17 @@ class HParameterValue : public HTemplateInstruction<0> { // than HGraph::number_of_in_vregs_; const uint8_t index_; - const Primitive::Type parameter_type_; - DISALLOW_COPY_AND_ASSIGN(HParameterValue); }; -class HNot : public HTemplateInstruction<1> { +class HNot : public HExpression<1> { public: - explicit HNot(HInstruction* input) { + explicit HNot(HInstruction* input) : HExpression(Primitive::kPrimBoolean, SideEffects::None()) { SetRawInputAt(0, input); } - virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; } + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } DECLARE_INSTRUCTION(Not); @@ -1076,9 +1552,11 @@ class HNot : public HTemplateInstruction<1> { class HPhi : public HInstruction { public: HPhi(ArenaAllocator* arena, uint32_t reg_number, size_t number_of_inputs, Primitive::Type type) - : inputs_(arena, number_of_inputs), + : HInstruction(SideEffects::None()), + inputs_(arena, number_of_inputs), reg_number_(reg_number), - type_(type) { + type_(type), + is_live_(false) { inputs_.SetSize(number_of_inputs); } @@ -1096,21 +1574,248 @@ class HPhi : public HInstruction { uint32_t GetRegNumber() const { return reg_number_; } + void SetDead() { is_live_ = false; } + void SetLive() { is_live_ = true; } + bool IsDead() const { return !is_live_; } + bool IsLive() const { return is_live_; } + DECLARE_INSTRUCTION(Phi); - protected: + private: GrowableArray<HInstruction*> inputs_; const uint32_t reg_number_; Primitive::Type type_; + bool is_live_; - private: DISALLOW_COPY_AND_ASSIGN(HPhi); }; +class HNullCheck : public HExpression<1> { + public: + HNullCheck(HInstruction* value, uint32_t dex_pc) + : HExpression(value->GetType(), SideEffects::None()), dex_pc_(dex_pc) { + SetRawInputAt(0, value); + } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + + virtual bool NeedsEnvironment() const { return true; } + + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(NullCheck); + + private: + const uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HNullCheck); +}; + +class FieldInfo : public ValueObject { + public: + FieldInfo(MemberOffset field_offset, Primitive::Type field_type) + : field_offset_(field_offset), field_type_(field_type) {} + + MemberOffset GetFieldOffset() const { return field_offset_; } + Primitive::Type GetFieldType() const { return field_type_; } + + private: + const MemberOffset field_offset_; + const Primitive::Type field_type_; +}; + +class HInstanceFieldGet : public HExpression<1> { + public: + HInstanceFieldGet(HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset) + : HExpression(field_type, SideEffects::DependsOnSomething()), + field_info_(field_offset, field_type) { + SetRawInputAt(0, value); + } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { + size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue(); + return other_offset == GetFieldOffset().SizeValue(); + } + + virtual size_t ComputeHashCode() const { + return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); + } + + MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } + Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + + DECLARE_INSTRUCTION(InstanceFieldGet); + + private: + const FieldInfo field_info_; + + DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet); +}; + +class HInstanceFieldSet : public HTemplateInstruction<2> { + public: + HInstanceFieldSet(HInstruction* object, + HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset) + : HTemplateInstruction(SideEffects::ChangesSomething()), + field_info_(field_offset, field_type) { + SetRawInputAt(0, object); + SetRawInputAt(1, value); + } + + MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } + Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + + DECLARE_INSTRUCTION(InstanceFieldSet); + + private: + const FieldInfo field_info_; + + DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet); +}; + +class HArrayGet : public HExpression<2> { + public: + HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type) + : HExpression(type, SideEffects::DependsOnSomething()) { + SetRawInputAt(0, array); + SetRawInputAt(1, index); + } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + + DECLARE_INSTRUCTION(ArrayGet); + + private: + DISALLOW_COPY_AND_ASSIGN(HArrayGet); +}; + +class HArraySet : public HTemplateInstruction<3> { + public: + HArraySet(HInstruction* array, + HInstruction* index, + HInstruction* value, + Primitive::Type component_type, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::ChangesSomething()), + dex_pc_(dex_pc), + component_type_(component_type) { + SetRawInputAt(0, array); + SetRawInputAt(1, index); + SetRawInputAt(2, value); + } + + virtual bool NeedsEnvironment() const { + // We currently always call a runtime method to catch array store + // exceptions. + return InputAt(2)->GetType() == Primitive::kPrimNot; + } + + uint32_t GetDexPc() const { return dex_pc_; } + + Primitive::Type GetComponentType() const { return component_type_; } + + DECLARE_INSTRUCTION(ArraySet); + + private: + const uint32_t dex_pc_; + const Primitive::Type component_type_; + + DISALLOW_COPY_AND_ASSIGN(HArraySet); +}; + +class HArrayLength : public HExpression<1> { + public: + explicit HArrayLength(HInstruction* array) + : HExpression(Primitive::kPrimInt, SideEffects::None()) { + // Note that arrays do not change length, so the instruction does not + // depend on any write. + SetRawInputAt(0, array); + } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + + DECLARE_INSTRUCTION(ArrayLength); + + private: + DISALLOW_COPY_AND_ASSIGN(HArrayLength); +}; + +class HBoundsCheck : public HExpression<2> { + public: + HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc) + : HExpression(index->GetType(), SideEffects::None()), dex_pc_(dex_pc) { + DCHECK(index->GetType() == Primitive::kPrimInt); + SetRawInputAt(0, index); + SetRawInputAt(1, length); + } + + virtual bool CanBeMoved() const { return true; } + virtual bool InstructionDataEquals(HInstruction* other) const { return true; } + + virtual bool NeedsEnvironment() const { return true; } + + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(BoundsCheck); + + private: + const uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HBoundsCheck); +}; + +/** + * Some DEX instructions are folded into multiple HInstructions that need + * to stay live until the last HInstruction. This class + * is used as a marker for the baseline compiler to ensure its preceding + * HInstruction stays live. `index` is the temporary number that is used + * for knowing the stack offset where to store the instruction. + */ +class HTemporary : public HTemplateInstruction<0> { + public: + explicit HTemporary(size_t index) : HTemplateInstruction(SideEffects::None()), index_(index) {} + + size_t GetIndex() const { return index_; } + + DECLARE_INSTRUCTION(Temporary); + + private: + const size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HTemporary); +}; + +class HSuspendCheck : public HTemplateInstruction<0> { + public: + explicit HSuspendCheck(uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + + virtual bool NeedsEnvironment() const { + return true; + } + + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(SuspendCheck); + + private: + const uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); +}; + class MoveOperands : public ArenaObject { public: - MoveOperands(Location source, Location destination) - : source_(source), destination_(destination) {} + MoveOperands(Location source, Location destination, HInstruction* instruction) + : source_(source), destination_(destination), instruction_(instruction) {} Location GetSource() const { return source_; } Location GetDestination() const { return destination_; } @@ -1158,9 +1863,16 @@ class MoveOperands : public ArenaObject { return source_.IsInvalid(); } + HInstruction* GetInstruction() const { return instruction_; } + private: Location source_; Location destination_; + // The instruction this move is assocatied with. Null when this move is + // for moving an input in the expected locations of user (including a phi user). + // This is only used in debug mode, to ensure we do not connect interval siblings + // in the same parallel move. + HInstruction* instruction_; DISALLOW_COPY_AND_ASSIGN(MoveOperands); }; @@ -1169,9 +1881,16 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { public: - explicit HParallelMove(ArenaAllocator* arena) : moves_(arena, kDefaultNumberOfMoves) {} + explicit HParallelMove(ArenaAllocator* arena) + : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {} void AddMove(MoveOperands* move) { + if (kIsDebugBuild && move->GetInstruction() != nullptr) { + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction()) + << "Doing parallel moves for the same instruction."; + } + } moves_.Add(move); } @@ -1191,10 +1910,10 @@ class HParallelMove : public HTemplateInstruction<0> { class HGraphVisitor : public ValueObject { public: - explicit HGraphVisitor(HGraph* graph) : graph_(graph) { } - virtual ~HGraphVisitor() { } + explicit HGraphVisitor(HGraph* graph) : graph_(graph) {} + virtual ~HGraphVisitor() {} - virtual void VisitInstruction(HInstruction* instruction) { } + virtual void VisitInstruction(HInstruction* instruction) {} virtual void VisitBasicBlock(HBasicBlock* block); void VisitInsertionOrder(); diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc new file mode 100644 index 0000000000..b75bacb6ea --- /dev/null +++ b/compiler/optimizing/nodes_test.cc @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nodes.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +/** + * Test that removing instruction from the graph removes itself from user lists + * and environment lists. + */ +TEST(Node, RemoveInstruction) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + entry->AddInstruction(parameter); + entry->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(first_block); + entry->AddSuccessor(first_block); + HInstruction* null_check = new (&allocator) HNullCheck(parameter, 0); + first_block->AddInstruction(null_check); + first_block->AddInstruction(new (&allocator) HReturnVoid()); + + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + first_block->AddSuccessor(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); + null_check->SetEnvironment(environment); + environment->SetRawEnvAt(0, parameter); + parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); + + ASSERT_TRUE(parameter->HasEnvironmentUses()); + ASSERT_TRUE(parameter->HasUses()); + + first_block->RemoveInstruction(null_check); + + ASSERT_FALSE(parameter->HasEnvironmentUses()); + ASSERT_FALSE(parameter->HasUses()); +} + +} // namespace art diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index dfbb488c7d..65bdb18812 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -14,17 +14,22 @@ * limitations under the License. */ +#include "optimizing_compiler.h" + #include <fstream> #include <stdint.h> #include "builder.h" #include "code_generator.h" -#include "compilers.h" +#include "compiler.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "graph_visualizer.h" +#include "gvn.h" +#include "instruction_simplifier.h" #include "nodes.h" #include "register_allocator.h" +#include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" #include "utils/arena_allocator.h" @@ -35,7 +40,7 @@ namespace art { */ class CodeVectorAllocator FINAL : public CodeAllocator { public: - CodeVectorAllocator() { } + CodeVectorAllocator() {} virtual uint8_t* Allocate(size_t size) { size_ = size; @@ -64,12 +69,133 @@ static bool kIsVisualizerEnabled = false; */ static const char* kStringFilter = ""; -OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : QuickCompiler(driver) { +class OptimizingCompiler FINAL : public Compiler { + public: + explicit OptimizingCompiler(CompilerDriver* driver); + ~OptimizingCompiler(); + + bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const + OVERRIDE; + + CompiledMethod* Compile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const OVERRIDE; + + CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const; + + // For the following methods we will use the fallback. This is a delegation pattern. + CompiledMethod* JniCompile(uint32_t access_flags, + uint32_t method_idx, + const DexFile& dex_file) const OVERRIDE; + + uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + bool WriteElf(art::File* file, + OatWriter* oat_writer, + const std::vector<const art::DexFile*>& dex_files, + const std::string& android_root, + bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const OVERRIDE; + + void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; + + void Init() const OVERRIDE; + + void UnInit() const OVERRIDE; + + private: + // Whether we should run any optimization or register allocation. If false, will + // just run the code generation after the graph was built. + const bool run_optimizations_; + mutable AtomicInteger total_compiled_methods_; + mutable AtomicInteger unoptimized_compiled_methods_; + mutable AtomicInteger optimized_compiled_methods_; + + std::unique_ptr<std::ostream> visualizer_output_; + + // Delegate to another compiler in case the optimizing compiler cannot compile a method. + // Currently the fallback is the quick compiler. + std::unique_ptr<Compiler> delegate_; + + DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); +}; + +static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ + +OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) + : Compiler(driver, kMaximumCompilationTimeBeforeWarning), + run_optimizations_( + driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime), + total_compiled_methods_(0), + unoptimized_compiled_methods_(0), + optimized_compiled_methods_(0), + delegate_(Create(driver, Compiler::Kind::kQuick)) { if (kIsVisualizerEnabled) { visualizer_output_.reset(new std::ofstream("art.cfg")); } } +void OptimizingCompiler::Init() const { + delegate_->Init(); +} + +void OptimizingCompiler::UnInit() const { + delegate_->UnInit(); +} + +OptimizingCompiler::~OptimizingCompiler() { + if (total_compiled_methods_ == 0) { + LOG(INFO) << "Did not compile any method."; + } else { + size_t unoptimized_percent = (unoptimized_compiled_methods_ * 100 / total_compiled_methods_); + size_t optimized_percent = (optimized_compiled_methods_ * 100 / total_compiled_methods_); + LOG(INFO) << "Compiled " << total_compiled_methods_ << " methods: " + << unoptimized_percent << "% (" << unoptimized_compiled_methods_ << ") unoptimized, " + << optimized_percent << "% (" << optimized_compiled_methods_ << ") optimized."; + } +} + +bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, + CompilationUnit* cu) const { + return delegate_->CanCompileMethod(method_idx, dex_file, cu); +} + +CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, + uint32_t method_idx, + const DexFile& dex_file) const { + return delegate_->JniCompile(access_flags, method_idx, dex_file); +} + +uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const { + return delegate_->GetEntryPointOf(method); +} + +bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer, + const std::vector<const art::DexFile*>& dex_files, + const std::string& android_root, bool is_host) const { + return delegate_->WriteElf(file, oat_writer, dex_files, android_root, is_host); +} + +Backend* OptimizingCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const { + return delegate_->GetCodeGenerator(cu, compilation_unit); +} + +void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { + delegate_->InitCompilationUnit(cu); +} + CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, @@ -77,6 +203,19 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite uint32_t method_idx, jobject class_loader, const DexFile& dex_file) const { + total_compiled_methods_++; + InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet(); + // Always use the thumb2 assembler: some runtime functionality (like implicit stack + // overflow checks) assume thumb2. + if (instruction_set == kArm) { + instruction_set = kThumb2; + } + + // Do not attempt to compile on architectures we do not support. + if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kThumb2) { + return nullptr; + } + DexCompilationUnit dex_compilation_unit( nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, @@ -85,10 +224,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. bool shouldCompile = dex_compilation_unit.GetSymbol().find("00024opt_00024") != std::string::npos; + bool shouldOptimize = + dex_compilation_unit.GetSymbol().find("00024reg_00024") != std::string::npos; ArenaPool pool; ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file); + HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file, GetCompilerDriver()); HGraph* graph = builder.BuildGraph(*code_item); if (graph == nullptr) { @@ -98,11 +239,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite return nullptr; } - InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet(); - // The optimizing compiler currently does not have a Thumb2 assembler. - if (instruction_set == kThumb2) { - instruction_set = kArm; - } CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); if (codegen == nullptr) { if (shouldCompile) { @@ -116,38 +252,109 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer.DumpGraph("builder"); CodeVectorAllocator allocator; - codegen->Compile(&allocator); - - std::vector<uint8_t> mapping_table; - codegen->BuildMappingTable(&mapping_table); - std::vector<uint8_t> vmap_table; - codegen->BuildVMapTable(&vmap_table); - std::vector<uint8_t> gc_map; - codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); - - // Run these phases to get some test coverage. - graph->BuildDominatorTree(); - graph->TransformToSSA(); - visualizer.DumpGraph("ssa"); - - graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); - liveness.Analyze(); - visualizer.DumpGraph("liveness"); - - RegisterAllocator(graph->GetArena(), *codegen).AllocateRegisters(liveness); - visualizer.DumpGraph("register"); - - return new CompiledMethod(GetCompilerDriver(), - instruction_set, - allocator.GetMemory(), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ - mapping_table, - vmap_table, - gc_map, - nullptr); + + if (run_optimizations_ && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) { + optimized_compiled_methods_++; + graph->BuildDominatorTree(); + graph->TransformToSSA(); + visualizer.DumpGraph("ssa"); + graph->FindNaturalLoops(); + + SsaRedundantPhiElimination(graph).Run(); + SsaDeadPhiElimination(graph).Run(); + InstructionSimplifier(graph).Run(); + GlobalValueNumberer(graph->GetArena(), graph).Run(); + visualizer.DumpGraph(kGVNPassName); + + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); + visualizer.DumpGraph(kLivenessPassName); + + RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); + register_allocator.AllocateRegisters(); + + visualizer.DumpGraph(kRegisterAllocatorPassName); + codegen->CompileOptimized(&allocator); + + std::vector<uint8_t> mapping_table; + SrcMap src_mapping_table; + codegen->BuildMappingTable(&mapping_table, + GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? + &src_mapping_table : nullptr); + + std::vector<uint8_t> stack_map; + codegen->BuildStackMaps(&stack_map); + + return new CompiledMethod(GetCompilerDriver(), + instruction_set, + allocator.GetMemory(), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + mapping_table, + stack_map); + } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { + LOG(FATAL) << "Could not allocate registers in optimizing compiler"; + return nullptr; + } else { + unoptimized_compiled_methods_++; + codegen->CompileBaseline(&allocator); + + // Run these phases to get some test coverage. + graph->BuildDominatorTree(); + graph->TransformToSSA(); + visualizer.DumpGraph("ssa"); + graph->FindNaturalLoops(); + SsaRedundantPhiElimination(graph).Run(); + SsaDeadPhiElimination(graph).Run(); + GlobalValueNumberer(graph->GetArena(), graph).Run(); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); + visualizer.DumpGraph(kLivenessPassName); + + std::vector<uint8_t> mapping_table; + SrcMap src_mapping_table; + codegen->BuildMappingTable(&mapping_table, + GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? + &src_mapping_table : nullptr); + std::vector<uint8_t> vmap_table; + codegen->BuildVMapTable(&vmap_table); + std::vector<uint8_t> gc_map; + codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); + + return new CompiledMethod(GetCompilerDriver(), + instruction_set, + allocator.GetMemory(), + codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + 0, /* FPR spill mask, unused */ + &src_mapping_table, + mapping_table, + vmap_table, + gc_map, + nullptr); + } +} + +CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file) const { + CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, + method_idx, class_loader, dex_file); + if (method != nullptr) { + return method; + } + + return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, + class_loader, dex_file); +} + +Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { + return new OptimizingCompiler(driver); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h new file mode 100644 index 0000000000..a415eca2d0 --- /dev/null +++ b/compiler/optimizing/optimizing_compiler.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ +#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ + +namespace art { + +class Compiler; +class CompilerDriver; + +Compiler* CreateOptimizingCompiler(CompilerDriver* driver); + +} + +#endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 36a6a21d01..6dd53e5b14 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -17,8 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ +#include "nodes.h" +#include "builder.h" +#include "dex_file.h" +#include "dex_instruction.h" #include "ssa_liveness_analysis.h" +#include "gtest/gtest.h" + namespace art { #define NUM_INSTRUCTIONS(...) \ @@ -48,6 +54,46 @@ LiveInterval* BuildInterval(const size_t ranges[][2], return interval; } +void RemoveSuspendChecks(HGraph* graph) { + for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { + for (HInstructionIterator it(graph->GetBlocks().Get(i)->GetInstructions()); + !it.Done(); + it.Advance()) { + HInstruction* current = it.Current(); + if (current->IsSuspendCheck()) { + current->GetBlock()->RemoveInstruction(current); + } + } + } +} + +// Create a control-flow graph from Dex instructions. +inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data) { + HGraphBuilder builder(allocator); + const DexFile::CodeItem* item = + reinterpret_cast<const DexFile::CodeItem*>(data); + HGraph* graph = builder.BuildGraph(*item); + return graph; +} + +// Naive string diff data type. +typedef std::list<std::pair<std::string, std::string>> diff_t; + +// An alias for the empty string used to make it clear that a line is +// removed in a diff. +static const std::string removed = ""; + +// Naive patch command: apply a diff to a string. +inline std::string Patch(const std::string& original, const diff_t& diff) { + std::string result = original; + for (const auto& p : diff) { + std::string::size_type pos = result.find(p.first); + EXPECT_NE(pos, std::string::npos); + result.replace(pos, p.first.size(), p.second); + } + return result; +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 3d2d136ec3..cadd3c54d6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -147,4 +147,64 @@ void ParallelMoveResolver::PerformMove(size_t index) { } } +bool ParallelMoveResolver::IsScratchLocation(Location loc) { + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->Blocks(loc)) { + return false; + } + } + + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->GetDestination().Equals(loc)) { + return true; + } + } + + return false; +} + +int ParallelMoveResolver::AllocateScratchRegister(int blocked, + int register_count, + int if_scratch, + bool* spilled) { + DCHECK_NE(blocked, if_scratch); + int scratch = -1; + for (int reg = 0; reg < register_count; ++reg) { + if ((blocked != reg) && + IsScratchLocation(Location::RegisterLocation(ManagedRegister(reg)))) { + scratch = reg; + break; + } + } + + if (scratch == -1) { + *spilled = true; + scratch = if_scratch; + } else { + *spilled = false; + } + + return scratch; +} + + +ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) + : resolver_(resolver), + reg_(kNoRegister), + spilled_(false) { + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_); + + if (spilled_) { + resolver->SpillScratch(reg_); + } +} + + +ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { + if (spilled_) { + resolver_->RestoreScratch(reg_); + } +} + } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index ff20cb0bc6..fcc1de6dc9 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -23,6 +23,7 @@ namespace art { class HParallelMove; +class Location; class MoveOperands; /** @@ -39,15 +40,40 @@ class ParallelMoveResolver : public ValueObject { void EmitNativeCode(HParallelMove* parallel_move); protected: + class ScratchRegisterScope : public ValueObject { + public: + ScratchRegisterScope(ParallelMoveResolver* resolver, + int blocked, + int if_scratch, + int number_of_registers); + ~ScratchRegisterScope(); + + int GetRegister() const { return reg_; } + bool IsSpilled() const { return spilled_; } + + private: + ParallelMoveResolver* resolver_; + int reg_; + bool spilled_; + }; + + bool IsScratchLocation(Location loc); + int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); + // Emit a move. virtual void EmitMove(size_t index) = 0; // Execute a move by emitting a swap of two operands. virtual void EmitSwap(size_t index) = 0; + virtual void SpillScratch(int reg) = 0; + virtual void RestoreScratch(int reg) = 0; + // List of moves not yet resolved. GrowableArray<MoveOperands*> moves_; + static constexpr int kNoRegister = -1; + private: // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 88df24d9ac..863e107ee6 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -50,6 +50,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { << ")"; } + virtual void SpillScratch(int reg) {} + virtual void RestoreScratch(int reg) {} + std::string GetMessage() const { return message_.str(); } @@ -68,7 +71,8 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, for (size_t i = 0; i < number_of_moves; ++i) { moves->AddMove(new (allocator) MoveOperands( Location::RegisterLocation(ManagedRegister(operands[i][0])), - Location::RegisterLocation(ManagedRegister(operands[i][1])))); + Location::RegisterLocation(ManagedRegister(operands[i][1])), + nullptr)); } return moves; } diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index a7727c06a7..2c8166e65b 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_ #define ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_ +#include "base/stringprintf.h" #include "nodes.h" namespace art { diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 7e604e99b4..da6b294d71 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -45,7 +45,8 @@ TEST(PrettyPrinterTest, ReturnVoid) { const char* expected = "BasicBlock 0, succ: 1\n" - " 2: Goto 1\n" + " 2: SuspendCheck\n" + " 3: Goto 1\n" "BasicBlock 1, pred: 0, succ: 2\n" " 0: ReturnVoid\n" "BasicBlock 2, pred: 1\n" @@ -57,7 +58,8 @@ TEST(PrettyPrinterTest, ReturnVoid) { TEST(PrettyPrinterTest, CFG1) { const char* expected = "BasicBlock 0, succ: 1\n" - " 3: Goto 1\n" + " 3: SuspendCheck\n" + " 4: Goto 1\n" "BasicBlock 1, pred: 0, succ: 2\n" " 0: Goto 2\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -76,7 +78,8 @@ TEST(PrettyPrinterTest, CFG1) { TEST(PrettyPrinterTest, CFG2) { const char* expected = "BasicBlock 0, succ: 1\n" - " 4: Goto 1\n" + " 4: SuspendCheck\n" + " 5: Goto 1\n" "BasicBlock 1, pred: 0, succ: 2\n" " 0: Goto 2\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -97,15 +100,17 @@ TEST(PrettyPrinterTest, CFG2) { TEST(PrettyPrinterTest, CFG3) { const char* expected = "BasicBlock 0, succ: 1\n" - " 4: Goto 1\n" + " 5: SuspendCheck\n" + " 6: Goto 1\n" "BasicBlock 1, pred: 0, succ: 3\n" " 0: Goto 3\n" "BasicBlock 2, pred: 3, succ: 4\n" " 1: ReturnVoid\n" "BasicBlock 3, pred: 1, succ: 2\n" - " 2: Goto 2\n" + " 2: SuspendCheck\n" + " 3: Goto 2\n" "BasicBlock 4, pred: 2\n" - " 3: Exit\n"; + " 4: Exit\n"; const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, @@ -132,11 +137,13 @@ TEST(PrettyPrinterTest, CFG3) { TEST(PrettyPrinterTest, CFG4) { const char* expected = "BasicBlock 0, succ: 1\n" - " 2: Goto 1\n" + " 3: SuspendCheck\n" + " 4: Goto 1\n" "BasicBlock 1, pred: 0, 1, succ: 1\n" - " 0: Goto 1\n" + " 0: SuspendCheck\n" + " 1: Goto 1\n" "BasicBlock 2\n" - " 1: Exit\n"; + " 2: Exit\n"; const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, @@ -153,13 +160,15 @@ TEST(PrettyPrinterTest, CFG4) { TEST(PrettyPrinterTest, CFG5) { const char* expected = "BasicBlock 0, succ: 1\n" - " 3: Goto 1\n" + " 4: SuspendCheck\n" + " 5: Goto 1\n" "BasicBlock 1, pred: 0, 2, succ: 3\n" " 0: ReturnVoid\n" "BasicBlock 2, succ: 1\n" - " 1: Goto 1\n" + " 1: SuspendCheck\n" + " 2: Goto 1\n" "BasicBlock 3, pred: 1\n" - " 2: Exit\n"; + " 3: Exit\n"; const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, @@ -174,7 +183,8 @@ TEST(PrettyPrinterTest, CFG6) { "BasicBlock 0, succ: 1\n" " 0: Local [4, 3, 2]\n" " 1: IntConstant [2]\n" - " 10: Goto 1\n" + " 10: SuspendCheck\n" + " 11: Goto 1\n" "BasicBlock 1, pred: 0, succ: 3, 2\n" " 2: StoreLocal(0, 1)\n" " 3: LoadLocal(0) [5]\n" @@ -202,7 +212,8 @@ TEST(PrettyPrinterTest, CFG7) { "BasicBlock 0, succ: 1\n" " 0: Local [4, 3, 2]\n" " 1: IntConstant [2]\n" - " 10: Goto 1\n" + " 11: SuspendCheck\n" + " 12: Goto 1\n" "BasicBlock 1, pred: 0, succ: 3, 2\n" " 2: StoreLocal(0, 1)\n" " 3: LoadLocal(0) [5]\n" @@ -212,9 +223,10 @@ TEST(PrettyPrinterTest, CFG7) { "BasicBlock 2, pred: 1, 3, succ: 3\n" " 7: Goto 3\n" "BasicBlock 3, pred: 1, 2, succ: 2\n" - " 8: Goto 2\n" + " 8: SuspendCheck\n" + " 9: Goto 2\n" "BasicBlock 4\n" - " 9: Exit\n"; + " 10: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, @@ -230,7 +242,8 @@ TEST(PrettyPrinterTest, IntConstant) { "BasicBlock 0, succ: 1\n" " 0: Local [2]\n" " 1: IntConstant [2]\n" - " 5: Goto 1\n" + " 5: SuspendCheck\n" + " 6: Goto 1\n" "BasicBlock 1, pred: 0, succ: 2\n" " 2: StoreLocal(0, 1)\n" " 3: ReturnVoid\n" diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index dd175d2fa7..1d1d694ad2 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -16,73 +16,290 @@ #include "register_allocator.h" +#include "base/bit_vector-inl.h" #include "code_generator.h" #include "ssa_liveness_analysis.h" namespace art { static constexpr size_t kMaxLifetimePosition = -1; +static constexpr size_t kDefaultNumberOfSpillSlots = 4; -RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen) +RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness) : allocator_(allocator), codegen_(codegen), - unhandled_(allocator, 0), + liveness_(liveness), + unhandled_core_intervals_(allocator, 0), + unhandled_fp_intervals_(allocator, 0), + unhandled_(nullptr), handled_(allocator, 0), active_(allocator, 0), inactive_(allocator, 0), + physical_register_intervals_(allocator, codegen->GetNumberOfRegisters()), + temp_intervals_(allocator, 4), + spill_slots_(allocator, kDefaultNumberOfSpillSlots), + safepoints_(allocator, 0), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), - blocked_registers_(allocator->AllocArray<bool>(codegen.GetNumberOfRegisters())) { - codegen.SetupBlockedRegisters(blocked_registers_); + blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())), + reserved_out_slots_(0), + maximum_number_of_live_registers_(0) { + codegen->SetupBlockedRegisters(blocked_registers_); + physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters()); + // Always reserve for the current method and the graph's max out registers. + // TODO: compute it instead. + reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } -static bool ShouldProcess(bool processing_core_registers, HInstruction* instruction) { - bool is_core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); +bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, + InstructionSet instruction_set) { + if (!Supports(instruction_set)) { + return false; + } + for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { + for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); + !it.Done(); + it.Advance()) { + HInstruction* current = it.Current(); + if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; + if (current->GetType() == Primitive::kPrimFloat) return false; + if (current->GetType() == Primitive::kPrimDouble) return false; + } + } + return true; +} + +static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { + if (interval == nullptr) return false; + bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) + && (interval->GetType() != Primitive::kPrimFloat); return processing_core_registers == is_core_register; } -void RegisterAllocator::AllocateRegistersInternal(const SsaLivenessAnalysis& liveness) { - number_of_registers_ = processing_core_registers_ - ? codegen_.GetNumberOfCoreRegisters() - : codegen_.GetNumberOfFloatingPointRegisters(); +void RegisterAllocator::AllocateRegisters() { + AllocateRegistersInternal(); + Resolve(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + if (kIsDebugBuild) { + processing_core_registers_ = true; + ValidateInternal(true); + processing_core_registers_ = false; + ValidateInternal(true); + } +} +void RegisterAllocator::BlockRegister(Location location, + size_t start, + size_t end, + Primitive::Type type) { + int reg = location.reg().RegId(); + LiveInterval* interval = physical_register_intervals_.Get(reg); + if (interval == nullptr) { + interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); + physical_register_intervals_.Put(reg, interval); + inactive_.Add(interval); + } + DCHECK(interval->GetRegister() == reg); + interval->AddRange(start, end); +} + +void RegisterAllocator::AllocateRegistersInternal() { // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (size_t i = liveness.GetNumberOfSsaValues(); i > 0; --i) { - HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i - 1); - if (ShouldProcess(processing_core_registers_, instruction)) { - LiveInterval* current = instruction->GetLiveInterval(); - DCHECK(unhandled_.IsEmpty() || current->StartsBefore(unhandled_.Peek())); - unhandled_.Add(current); + for (HLinearPostOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + ProcessInstruction(it.Current()); + } + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + ProcessInstruction(it.Current()); } } + number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + processing_core_registers_ = true; + unhandled_ = &unhandled_core_intervals_; LinearScan(); - if (kIsDebugBuild) { - ValidateInternal(liveness, true); + + inactive_.Reset(); + active_.Reset(); + handled_.Reset(); + + number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + processing_core_registers_ = false; + unhandled_ = &unhandled_fp_intervals_; + // TODO: Enable FP register allocation. + DCHECK(unhandled_->IsEmpty()); + LinearScan(); +} + +void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { + LocationSummary* locations = instruction->GetLocations(); + size_t position = instruction->GetLifetimePosition(); + + if (locations == nullptr) return; + + // Create synthesized intervals for temporaries. + for (size_t i = 0; i < locations->GetTempCount(); ++i) { + Location temp = locations->GetTemp(i); + if (temp.IsRegister()) { + BlockRegister(temp, position, position + 1, Primitive::kPrimInt); + } else { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, instruction, Primitive::kPrimInt); + temp_intervals_.Add(interval); + interval->AddRange(position, position + 1); + unhandled_core_intervals_.Add(interval); + } + } + + bool core_register = (instruction->GetType() != Primitive::kPrimDouble) + && (instruction->GetType() != Primitive::kPrimFloat); + + GrowableArray<LiveInterval*>& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + + if (locations->CanCall()) { + if (!instruction->IsSuspendCheck()) { + codegen_->MarkNotLeaf(); + } + safepoints_.Add(instruction); + if (locations->OnlyCallsOnSlowPath()) { + // We add a synthesized range at this position to record the live registers + // at this position. Ideally, we could just update the safepoints when locations + // are updated, but we currently need to know the full stack size before updating + // locations (because of parameters and the fact that we don't have a frame pointer). + // And knowing the full stack size requires to know the maximum number of live + // registers at calls in slow paths. + // By adding the following interval in the algorithm, we can compute this + // maximum before updating locations. + LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); + interval->AddRange(position, position + 1); + unhandled.Add(interval); + } + } + + if (locations->WillCall()) { + // Block all registers. + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { + BlockRegister(Location::RegisterLocation(ManagedRegister(i)), + position, + position + 1, + Primitive::kPrimInt); + } + } + + for (size_t i = 0; i < instruction->InputCount(); ++i) { + Location input = locations->InAt(i); + if (input.IsRegister()) { + BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType()); + } + } + + LiveInterval* current = instruction->GetLiveInterval(); + if (current == nullptr) return; + + DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + // Some instructions define their output in fixed register/stack slot. We need + // to ensure we know these locations before doing register allocation. For a + // given register, we create an interval that covers these locations. The register + // will be unavailable at these locations when trying to allocate one for an + // interval. + // + // The backwards walking ensures the ranges are ordered on increasing start positions. + Location output = locations->Out(); + if (output.IsRegister()) { + // Shift the interval's start by one to account for the blocked register. + current->SetFrom(position + 1); + current->SetRegister(output.reg().RegId()); + BlockRegister(output, position, position + 1, instruction->GetType()); + } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { + current->SetSpillSlot(output.GetStackIndex()); + } + + // If needed, add interval to the list of unhandled intervals. + if (current->HasSpillSlot() || instruction->IsConstant()) { + // Split before first register use. + size_t first_register_use = current->FirstRegisterUse(); + if (first_register_use != kNoLifetime) { + LiveInterval* split = Split(current, first_register_use); + // Don't add direclty to `unhandled`, it needs to be sorted and the start + // of this new interval might be after intervals already in the list. + AddSorted(&unhandled, split); + } else { + // Nothing to do, we won't allocate a register for this value. + } + } else { + DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + unhandled.Add(current); } } -bool RegisterAllocator::ValidateInternal(const SsaLivenessAnalysis& liveness, - bool log_fatal_on_failure) const { +class AllRangesIterator : public ValueObject { + public: + explicit AllRangesIterator(LiveInterval* interval) + : current_interval_(interval), + current_range_(interval->GetFirstRange()) {} + + bool Done() const { return current_interval_ == nullptr; } + LiveRange* CurrentRange() const { return current_range_; } + LiveInterval* CurrentInterval() const { return current_interval_; } + + void Advance() { + current_range_ = current_range_->GetNext(); + if (current_range_ == nullptr) { + current_interval_ = current_interval_->GetNextSibling(); + if (current_interval_ != nullptr) { + current_range_ = current_interval_->GetFirstRange(); + } + } + } + + private: + LiveInterval* current_interval_; + LiveRange* current_range_; + + DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); +}; + +bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { // To simplify unit testing, we eagerly create the array of intervals, and // call the helper method. GrowableArray<LiveInterval*> intervals(allocator_, 0); - for (size_t i = 0; i < liveness.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i); - if (ShouldProcess(processing_core_registers_, instruction)) { + for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { intervals.Add(instruction->GetLiveInterval()); } } - return ValidateIntervals(intervals, codegen_, allocator_, processing_core_registers_, - log_fatal_on_failure); + + for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_register_intervals_.Get(i); + if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) { + intervals.Add(fixed); + } + } + + for (size_t i = 0, e = temp_intervals_.Size(); i < e; ++i) { + LiveInterval* temp = temp_intervals_.Get(i); + if (ShouldProcess(processing_core_registers_, temp)) { + intervals.Add(temp); + } + } + + return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_, + allocator_, processing_core_registers_, log_fatal_on_failure); } -bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ranges, +bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + size_t number_of_spill_slots, + size_t number_of_out_slots, const CodeGenerator& codegen, ArenaAllocator* allocator, bool processing_core_registers, @@ -90,28 +307,51 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ra size_t number_of_registers = processing_core_registers ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); - GrowableArray<ArenaBitVector*> bit_vectors(allocator, number_of_registers); + GrowableArray<ArenaBitVector*> liveness_of_values( + allocator, number_of_registers + number_of_spill_slots); // Allocate a bit vector per register. A live interval that has a register // allocated will populate the associated bit vector based on its live ranges. - for (size_t i = 0; i < number_of_registers; i++) { - bit_vectors.Add(new (allocator) ArenaBitVector(allocator, 0, true)); + for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { + liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true)); } - for (size_t i = 0, e = ranges.Size(); i < e; ++i) { - LiveInterval* current = ranges.Get(i); - do { - if (!current->HasRegister()) { - continue; + for (size_t i = 0, e = intervals.Size(); i < e; ++i) { + for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { + LiveInterval* current = it.CurrentInterval(); + HInstruction* defined_by = current->GetParent()->GetDefinedBy(); + if (current->GetParent()->HasSpillSlot() + // Parameters have their own stack slot. + && !(defined_by != nullptr && defined_by->IsParameterValue())) { + BitVector* liveness_of_spill_slot = liveness_of_values.Get(number_of_registers + + current->GetParent()->GetSpillSlot() / kVRegSize + - number_of_out_slots); + for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { + if (liveness_of_spill_slot->IsBitSet(j)) { + if (log_fatal_on_failure) { + std::ostringstream message; + message << "Spill slot conflict at " << j; + LOG(FATAL) << message.str(); + } else { + return false; + } + } else { + liveness_of_spill_slot->SetBit(j); + } + } } - BitVector* vector = bit_vectors.Get(current->GetRegister()); - LiveRange* range = current->GetFirstRange(); - do { - for (size_t j = range->GetStart(); j < range->GetEnd(); ++j) { - if (vector->IsBitSet(j)) { + + if (current->HasRegister()) { + BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); + for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { + if (liveness_of_register->IsBitSet(j)) { if (log_fatal_on_failure) { std::ostringstream message; - message << "Register conflict at " << j << " for "; + message << "Register conflict at " << j << " "; + if (defined_by != nullptr) { + message << "(" << defined_by->DebugName() << ")"; + } + message << "for "; if (processing_core_registers) { codegen.DumpCoreRegister(message, current->GetRegister()); } else { @@ -122,23 +362,23 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ra return false; } } else { - vector->SetBit(j); + liveness_of_register->SetBit(j); } } - } while ((range = range->GetNext()) != nullptr); - } while ((current = current->GetNextSibling()) != nullptr); + } + } } return true; } -void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) { +void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const { interval->Dump(stream); stream << ": "; if (interval->HasRegister()) { if (processing_core_registers_) { - codegen_.DumpCoreRegister(stream, interval->GetRegister()); + codegen_->DumpCoreRegister(stream, interval->GetRegister()); } else { - codegen_.DumpFloatingPointRegister(stream, interval->GetRegister()); + codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); } } else { stream << "spilled"; @@ -148,9 +388,10 @@ void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interva // By the book implementation of a linear scan register allocator. void RegisterAllocator::LinearScan() { - while (!unhandled_.IsEmpty()) { + while (!unhandled_->IsEmpty()) { // (1) Remove interval with the lowest start position from unhandled. - LiveInterval* current = unhandled_.Pop(); + LiveInterval* current = unhandled_->Pop(); + DCHECK(!current->IsFixed() && !current->HasSpillSlot()); size_t position = current->GetStart(); // (2) Remove currently active intervals that are dead at this position. @@ -184,6 +425,14 @@ void RegisterAllocator::LinearScan() { } } + if (current->IsSlowPathSafepoint()) { + // Synthesized interval to record the maximum number of live registers + // at safepoints. No need to allocate a register for it. + maximum_number_of_live_registers_ = + std::max(maximum_number_of_live_registers_, active_.Size()); + continue; + } + // (4) Try to find an available register. bool success = TryAllocateFreeReg(current); @@ -210,13 +459,6 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { free_until[i] = kMaxLifetimePosition; } - // For each active interval, set its register to not free. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* interval = active_.Get(i); - DCHECK(interval->HasRegister()); - free_until[interval->GetRegister()] = 0; - } - // For each inactive interval, set its register to be free until // the next intersection with `current`. // Thanks to SSA, this should only be needed for intervals @@ -226,17 +468,31 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { DCHECK(inactive->HasRegister()); size_t next_intersection = inactive->FirstIntersectionWith(current); if (next_intersection != kNoLifetime) { - free_until[inactive->GetRegister()] = next_intersection; + free_until[inactive->GetRegister()] = + std::min(free_until[inactive->GetRegister()], next_intersection); } } - // Pick the register that is free the longest. + // For each active interval, set its register to not free. + for (size_t i = 0, e = active_.Size(); i < e; ++i) { + LiveInterval* interval = active_.Get(i); + DCHECK(interval->HasRegister()); + free_until[interval->GetRegister()] = 0; + } + int reg = -1; - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == -1 || free_until[i] > free_until[reg]) { - reg = i; - if (free_until[i] == kMaxLifetimePosition) break; + if (current->HasRegister()) { + // Some instructions have a fixed register output. + reg = current->GetRegister(); + DCHECK_NE(free_until[reg], 0u); + } else { + // Pick the register that is free the longest. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (reg == -1 || free_until[i] > free_until[reg]) { + reg = i; + if (free_until[i] == kMaxLifetimePosition) break; + } } } @@ -252,7 +508,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { // the register is not available anymore. LiveInterval* split = Split(current, free_until[reg]); DCHECK(split != nullptr); - AddToUnhandled(split); + AddSorted(unhandled_, split); } return true; } @@ -269,8 +525,8 @@ bool RegisterAllocator::IsBlocked(int reg) const { // we spill `current` instead. bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { size_t first_register_use = current->FirstRegisterUse(); - if (current->FirstRegisterUse() == kNoLifetime) { - // TODO: Allocate spill slot for `current`. + if (first_register_use == kNoLifetime) { + AllocateSpillSlotFor(current); return false; } @@ -285,9 +541,13 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); DCHECK(active->HasRegister()); - size_t use = active->FirstRegisterUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[active->GetRegister()] = use; + if (active->IsFixed()) { + next_use[active->GetRegister()] = current->GetStart(); + } else { + size_t use = active->FirstRegisterUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[active->GetRegister()] = use; + } } } @@ -298,9 +558,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { LiveInterval* inactive = inactive_.Get(i); DCHECK(inactive->HasRegister()); - size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[inactive->GetRegister()] = use; + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + next_use[inactive->GetRegister()] = + std::min(next_intersection, next_use[inactive->GetRegister()]); + } else { + size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); + } + } } } @@ -317,8 +585,9 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (first_register_use >= next_use[reg]) { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. - LiveInterval* split = Split(current, first_register_use - 1); - AddToUnhandled(split); + AllocateSpillSlotFor(current); + LiveInterval* split = Split(current, first_register_use); + AddSorted(unhandled_, split); return false; } else { // Use this register and spill the active and inactives interval that @@ -328,10 +597,11 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); if (active->GetRegister() == reg) { + DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); active_.DeleteAt(i); handled_.Add(active); - AddToUnhandled(split); + AddSorted(unhandled_, split); break; } } @@ -339,11 +609,19 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0; i < inactive_.Size(); ++i) { LiveInterval* inactive = inactive_.Get(i); if (inactive->GetRegister() == reg) { - LiveInterval* split = Split(inactive, current->GetStart()); - inactive_.DeleteAt(i); - handled_.Add(inactive); - AddToUnhandled(split); - --i; + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + LiveInterval* split = Split(current, next_intersection); + AddSorted(unhandled_, split); + } else { + LiveInterval* split = Split(inactive, current->GetStart()); + inactive_.DeleteAt(i); + handled_.Add(inactive); + AddSorted(unhandled_, split); + --i; + } + } } } @@ -351,14 +629,16 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } } -void RegisterAllocator::AddToUnhandled(LiveInterval* interval) { - for (size_t i = unhandled_.Size(); i > 0; --i) { - LiveInterval* current = unhandled_.Get(i - 1); +void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) { + size_t insert_at = 0; + for (size_t i = array->Size(); i > 0; --i) { + LiveInterval* current = array->Get(i - 1); if (current->StartsAfter(interval)) { - unhandled_.InsertAt(i, interval); + insert_at = i; break; } } + array->InsertAt(insert_at, interval); } LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { @@ -370,9 +650,494 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) return interval; } else { LiveInterval* new_interval = interval->SplitAt(position); - // TODO: Allocate spill slot for `interval`. return new_interval; } } +static bool NeedTwoSpillSlot(Primitive::Type type) { + return type == Primitive::kPrimLong || type == Primitive::kPrimDouble; +} + +void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { + LiveInterval* parent = interval->GetParent(); + + // An instruction gets a spill slot for its entire lifetime. If the parent + // of this interval already has a spill slot, there is nothing to do. + if (parent->HasSpillSlot()) { + return; + } + + HInstruction* defined_by = parent->GetDefinedBy(); + if (defined_by->IsParameterValue()) { + // Parameters have their own stack slot. + parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); + return; + } + + if (defined_by->IsConstant()) { + // Constants don't need a spill slot. + return; + } + + LiveInterval* last_sibling = interval; + while (last_sibling->GetNextSibling() != nullptr) { + last_sibling = last_sibling->GetNextSibling(); + } + size_t end = last_sibling->GetEnd(); + + // Find an available spill slot. + size_t slot = 0; + for (size_t e = spill_slots_.Size(); slot < e; ++slot) { + // We check if it is less rather than less or equal because the parallel move + // resolver does not work when a single spill slot needs to be exchanged with + // a double spill slot. The strict comparison avoids needing to exchange these + // locations at the same lifetime position. + if (spill_slots_.Get(slot) < parent->GetStart() + && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) { + break; + } + } + + if (NeedTwoSpillSlot(parent->GetType())) { + if (slot == spill_slots_.Size()) { + // We need a new spill slot. + spill_slots_.Add(end); + spill_slots_.Add(end); + } else if (slot == spill_slots_.Size() - 1) { + spill_slots_.Put(slot, end); + spill_slots_.Add(end); + } else { + spill_slots_.Put(slot, end); + spill_slots_.Put(slot + 1, end); + } + } else { + if (slot == spill_slots_.Size()) { + // We need a new spill slot. + spill_slots_.Add(end); + } else { + spill_slots_.Put(slot, end); + } + } + + parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize); +} + +static Location ConvertToLocation(LiveInterval* interval) { + if (interval->HasRegister()) { + return Location::RegisterLocation(ManagedRegister(interval->GetRegister())); + } else { + HInstruction* defined_by = interval->GetParent()->GetDefinedBy(); + if (defined_by->IsConstant()) { + return defined_by->GetLocations()->Out(); + } else { + DCHECK(interval->GetParent()->HasSpillSlot()); + if (NeedTwoSpillSlot(interval->GetType())) { + return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); + } else { + return Location::StackSlot(interval->GetParent()->GetSpillSlot()); + } + } + } +} + +// We create a special marker for inputs moves to differentiate them from +// moves created during resolution. They must be different instructions +// because the input moves work on the assumption that the interval moves +// have been executed. +static constexpr size_t kInputMoveLifetimePosition = 0; +static bool IsInputMove(HInstruction* instruction) { + return instruction->GetLifetimePosition() == kInputMoveLifetimePosition; +} + +static bool IsValidDestination(Location destination) { + return destination.IsRegister() || destination.IsStackSlot() || destination.IsDoubleStackSlot(); +} + +void RegisterAllocator::AddInputMoveFor(HInstruction* user, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)); + if (source.Equals(destination)) return; + + DCHECK(user->AsPhi() == nullptr); + + HInstruction* previous = user->GetPrevious(); + HParallelMove* move = nullptr; + if (previous == nullptr + || previous->AsParallelMove() == nullptr + || !IsInputMove(previous)) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(kInputMoveLifetimePosition); + user->GetBlock()->InsertInstructionBefore(move, user); + } else { + move = previous->AsParallelMove(); + } + DCHECK(IsInputMove(move)); + move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr)); +} + +void RegisterAllocator::InsertParallelMoveAt(size_t position, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)); + if (source.Equals(destination)) return; + + HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); + if (at == nullptr) { + // Block boundary, don't no anything the connection of split siblings will handle it. + return; + } + HParallelMove* move; + if ((position & 1) == 1) { + // Move must happen after the instruction. + DCHECK(!at->IsControlFlow()); + move = at->GetNext()->AsParallelMove(); + // This is a parallel move for connecting siblings in a same block. We need to + // differentiate it with moves for connecting blocks, and input moves. + if (move == nullptr || move->GetLifetimePosition() != position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); + } + } else { + // Move must happen before the instruction. + HInstruction* previous = at->GetPrevious(); + if (previous != nullptr && previous->IsParallelMove()) { + // This is a parallel move for connecting siblings in a same block. We need to + // differentiate it with moves for connecting blocks, and input moves. + if (previous->GetLifetimePosition() != position) { + // If the previous instruction of the previous instruction is not a parallel + // move, we have to insert the new parallel move before the input or connecting + // block moves. + at = previous; + previous = previous->GetPrevious(); + } + } + if (previous == nullptr + || !previous->IsParallelMove() + || previous->GetLifetimePosition() != position) { + // If the previous is a parallel move, then its position must be lower + // than the given `position`: it was added just after the non-parallel + // move instruction that precedes `instruction`. + DCHECK(previous == nullptr + || !previous->IsParallelMove() + || previous->GetLifetimePosition() < position); + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at); + } else { + move = previous->AsParallelMove(); + } + } + move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); +} + +void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)); + if (source.Equals(destination)) return; + + DCHECK_EQ(block->GetSuccessors().Size(), 1u); + HInstruction* last = block->GetLastInstruction(); + HInstruction* previous = last->GetPrevious(); + HParallelMove* move; + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and output moves. + if (previous == nullptr || !previous->IsParallelMove() + || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(block->GetLifetimeEnd()); + block->InsertInstructionBefore(move, last); + } else { + move = previous->AsParallelMove(); + } + move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); +} + +void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)); + if (source.Equals(destination)) return; + + HInstruction* first = block->GetFirstInstruction(); + HParallelMove* move = first->AsParallelMove(); + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and input moves. + if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(block->GetLifetimeStart()); + block->InsertInstructionBefore(move, first); + } + move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); +} + +void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, + Location source, + Location destination) const { + DCHECK(IsValidDestination(destination)); + if (source.Equals(destination)) return; + + if (instruction->AsPhi() != nullptr) { + InsertParallelMoveAtEntryOf(instruction->GetBlock(), instruction, source, destination); + return; + } + + size_t position = instruction->GetLifetimePosition() + 1; + HParallelMove* move = instruction->GetNext()->AsParallelMove(); + // This is a parallel move for moving the output of an instruction. We need + // to differentiate with input moves, moves for connecting siblings in a + // and moves for connecting blocks. + if (move == nullptr || move->GetLifetimePosition() != position) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); + } + move->AddMove(new (allocator_) MoveOperands(source, destination, instruction)); +} + +void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { + LiveInterval* current = interval; + if (current->HasSpillSlot() && current->HasRegister()) { + // We spill eagerly, so move must be at definition. + InsertMoveAfter(interval->GetDefinedBy(), + Location::RegisterLocation(ManagedRegister(interval->GetRegister())), + NeedTwoSpillSlot(interval->GetType()) + ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) + : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + } + UsePosition* use = current->GetFirstUse(); + + // Walk over all siblings, updating locations of use positions, and + // connecting them when they are adjacent. + do { + Location source = ConvertToLocation(current); + + // Walk over all uses covered by this interval, and update the location + // information. + while (use != nullptr && use->GetPosition() <= current->GetEnd()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + if (use->GetIsEnvironment()) { + locations->SetEnvironmentAt(use->GetInputIndex(), source); + } else { + Location expected_location = locations->InAt(use->GetInputIndex()); + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(use->GetUser(), source, expected_location); + } + } + use = use->GetNext(); + } + + // If the next interval starts just after this one, and has a register, + // insert a move. + LiveInterval* next_sibling = current->GetNextSibling(); + if (next_sibling != nullptr + && next_sibling->HasRegister() + && current->GetEnd() == next_sibling->GetStart()) { + Location destination = ConvertToLocation(next_sibling); + InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination); + } + + // At each safepoint, we record stack and register information. + for (size_t i = 0, e = safepoints_.Size(); i < e; ++i) { + HInstruction* safepoint = safepoints_.Get(i); + size_t position = safepoint->GetLifetimePosition(); + LocationSummary* locations = safepoint->GetLocations(); + if (!current->Covers(position)) continue; + + if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); + } + + switch (source.GetKind()) { + case Location::kRegister: { + locations->AddLiveRegister(source); + if (current->GetType() == Primitive::kPrimNot) { + locations->SetRegisterBit(source.reg().RegId()); + } + break; + } + case Location::kStackSlot: // Fall-through + case Location::kDoubleStackSlot: // Fall-through + case Location::kConstant: { + // Nothing to do. + break; + } + default: { + LOG(FATAL) << "Unexpected location for object"; + } + } + } + current = next_sibling; + } while (current != nullptr); + DCHECK(use == nullptr); +} + +void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, + HBasicBlock* from, + HBasicBlock* to) const { + if (interval->GetNextSibling() == nullptr) { + // Nothing to connect. The whole range was allocated to the same location. + return; + } + + size_t from_position = from->GetLifetimeEnd() - 1; + // When an instructions dies at entry of another, and the latter is the beginning + // of a block, the register allocator ensures the former has a register + // at block->GetLifetimeStart() + 1. Since this is at a block boundary, it must + // must be handled in this method. + size_t to_position = to->GetLifetimeStart() + 1; + + LiveInterval* destination = nullptr; + LiveInterval* source = nullptr; + + LiveInterval* current = interval; + + // Check the intervals that cover `from` and `to`. + while ((current != nullptr) && (source == nullptr || destination == nullptr)) { + if (current->Covers(from_position)) { + DCHECK(source == nullptr); + source = current; + } + if (current->Covers(to_position)) { + DCHECK(destination == nullptr); + destination = current; + } + + current = current->GetNextSibling(); + } + + if (destination == source) { + // Interval was not split. + return; + } + + DCHECK(destination != nullptr && source != nullptr); + + if (!destination->HasRegister()) { + // Values are eagerly spilled. Spill slot already contains appropriate value. + return; + } + + // If `from` has only one successor, we can put the moves at the exit of it. Otherwise + // we need to put the moves at the entry of `to`. + if (from->GetSuccessors().Size() == 1) { + InsertParallelMoveAtExitOf(from, + interval->GetParent()->GetDefinedBy(), + ConvertToLocation(source), + ConvertToLocation(destination)); + } else { + DCHECK_EQ(to->GetPredecessors().Size(), 1u); + InsertParallelMoveAtEntryOf(to, + interval->GetParent()->GetDefinedBy(), + ConvertToLocation(source), + ConvertToLocation(destination)); + } +} + +// Returns the location of `interval`, or siblings of `interval`, at `position`. +static Location FindLocationAt(LiveInterval* interval, size_t position) { + LiveInterval* current = interval; + while (!current->Covers(position)) { + current = current->GetNextSibling(); + DCHECK(current != nullptr); + } + return ConvertToLocation(current); +} + +void RegisterAllocator::Resolve() { + codegen_->ComputeFrameSize( + spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); + + // Adjust the Out Location of instructions. + // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + LiveInterval* current = instruction->GetLiveInterval(); + LocationSummary* locations = instruction->GetLocations(); + Location location = locations->Out(); + if (instruction->AsParameterValue() != nullptr) { + // Now that we know the frame size, adjust the parameter's location. + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->SetOut(location); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->SetOut(location); + } else if (current->HasSpillSlot()) { + current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); + } + } + + Location source = ConvertToLocation(current); + + if (location.IsUnallocated()) { + if (location.GetPolicy() == Location::kSameAsFirstInput) { + locations->SetInAt(0, source); + } + locations->SetOut(source); + } else { + DCHECK(source.Equals(location)); + } + } + + // Connect siblings. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + ConnectSiblings(instruction->GetLiveInterval()); + } + + // Resolve non-linear control flow across branches. Order does not matter. + for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + BitVector* live = liveness_.GetLiveInSet(*block); + for (uint32_t idx : live->Indexes()) { + HInstruction* current = liveness_.GetInstructionFromSsaIndex(idx); + LiveInterval* interval = current->GetLiveInterval(); + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + ConnectSplitSiblings(interval, block->GetPredecessors().Get(i), block); + } + } + } + + // Resolve phi inputs. Order does not matter. + for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + for (HInstructionIterator it(current->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* phi = it.Current(); + for (size_t i = 0, e = current->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = current->GetPredecessors().Get(i); + DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u); + HInstruction* input = phi->InputAt(i); + Location source = FindLocationAt(input->GetLiveInterval(), + predecessor->GetLastInstruction()->GetLifetimePosition()); + Location destination = ConvertToLocation(phi->GetLiveInterval()); + InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination); + } + } + } + + // Assign temp locations. + HInstruction* current = nullptr; + size_t temp_index = 0; + for (size_t i = 0; i < temp_intervals_.Size(); ++i) { + LiveInterval* temp = temp_intervals_.Get(i); + if (temp->GetDefinedBy() != current) { + temp_index = 0; + current = temp->GetDefinedBy(); + } + LocationSummary* locations = current->GetLocations(); + locations->SetTempAt( + temp_index++, Location::RegisterLocation(ManagedRegister(temp->GetRegister()))); + } +} + } // namespace art diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index e575b9678d..d4c233a7f8 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -18,12 +18,20 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ #include "base/macros.h" +#include "primitive.h" #include "utils/growable_array.h" +#include "gtest/gtest.h" + namespace art { class CodeGenerator; +class HBasicBlock; +class HGraph; +class HInstruction; +class HParallelMove; class LiveInterval; +class Location; class SsaLivenessAnalysis; /** @@ -31,43 +39,55 @@ class SsaLivenessAnalysis; */ class RegisterAllocator { public: - RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen); + RegisterAllocator(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis); // Main entry point for the register allocator. Given the liveness analysis, // allocates registers to live intervals. - void AllocateRegisters(const SsaLivenessAnalysis& liveness) { - processing_core_registers_ = true; - AllocateRegistersInternal(liveness); - processing_core_registers_ = false; - AllocateRegistersInternal(liveness); - } + void AllocateRegisters(); // Validate that the register allocator did not allocate the same register to // intervals that intersect each other. Returns false if it did not. - bool Validate(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) { + bool Validate(bool log_fatal_on_failure) { processing_core_registers_ = true; - if (!ValidateInternal(liveness, log_fatal_on_failure)) { + if (!ValidateInternal(log_fatal_on_failure)) { return false; } processing_core_registers_ = false; - return ValidateInternal(liveness, log_fatal_on_failure); + return ValidateInternal(log_fatal_on_failure); } // Helper method for validation. Used by unit testing. static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + size_t number_of_spill_slots, + size_t number_of_out_slots, const CodeGenerator& codegen, ArenaAllocator* allocator, bool processing_core_registers, bool log_fatal_on_failure); + static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); + static bool Supports(InstructionSet instruction_set) { + return instruction_set == kX86 + || instruction_set == kArm + || instruction_set == kX86_64 + || instruction_set == kThumb2; + } + + size_t GetNumberOfSpillSlots() const { + return spill_slots_.Size(); + } + private: // Main methods of the allocator. void LinearScan(); bool TryAllocateFreeReg(LiveInterval* interval); bool AllocateBlockedReg(LiveInterval* interval); + void Resolve(); - // Add `interval` in the sorted list of unhandled intervals. - void AddToUnhandled(LiveInterval* interval); + // Add `interval` in the given sorted list. + static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval); // Split `interval` at the position `at`. The new interval starts at `at`. LiveInterval* Split(LiveInterval* interval, size_t at); @@ -75,17 +95,55 @@ class RegisterAllocator { // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; + // Update the interval for the register in `location` to cover [start, end). + void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type); + + // Allocate a spill slot for the given interval. + void AllocateSpillSlotFor(LiveInterval* interval); + + // Connect adjacent siblings within blocks. + void ConnectSiblings(LiveInterval* interval); + + // Connect siblings between block entries and exits. + void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; + + // Helper methods to insert parallel moves in the graph. + void InsertParallelMoveAtExitOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const; + void InsertParallelMoveAtEntryOf(HBasicBlock* block, + HInstruction* instruction, + Location source, + Location destination) const; + void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* user, Location source, Location destination) const; + void InsertParallelMoveAt(size_t position, + HInstruction* instruction, + Location source, + Location destination) const; + // Helper methods. - void AllocateRegistersInternal(const SsaLivenessAnalysis& liveness); - bool ValidateInternal(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) const; - void DumpInterval(std::ostream& stream, LiveInterval* interval); + void AllocateRegistersInternal(); + void ProcessInstruction(HInstruction* instruction); + bool ValidateInternal(bool log_fatal_on_failure) const; + void DumpInterval(std::ostream& stream, LiveInterval* interval) const; ArenaAllocator* const allocator_; - const CodeGenerator& codegen_; + CodeGenerator* const codegen_; + const SsaLivenessAnalysis& liveness_; - // List of intervals that must be processed, ordered by start position. Last entry - // is the interval that has the lowest start position. - GrowableArray<LiveInterval*> unhandled_; + // List of intervals for core registers that must be processed, ordered by start + // position. Last entry is the interval that has the lowest start position. + // This list is initially populated before doing the linear scan. + GrowableArray<LiveInterval*> unhandled_core_intervals_; + + // List of intervals for floating-point registers. Same comments as above. + GrowableArray<LiveInterval*> unhandled_fp_intervals_; + + // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` + // or `unhandled_fp_intervals_`. + GrowableArray<LiveInterval*>* unhandled_; // List of intervals that have been processed. GrowableArray<LiveInterval*> handled_; @@ -98,6 +156,20 @@ class RegisterAllocator { // That is, they have a lifetime hole that spans the start of the new interval. GrowableArray<LiveInterval*> inactive_; + // Fixed intervals for physical registers. Such intervals cover the positions + // where an instruction requires a specific register. + GrowableArray<LiveInterval*> physical_register_intervals_; + + // Intervals for temporaries. Such intervals cover the positions + // where an instruction requires a temporary. + GrowableArray<LiveInterval*> temp_intervals_; + + // The spill slots allocated for live intervals. + GrowableArray<size_t> spill_slots_; + + // Instructions that need a safepoint. + GrowableArray<HInstruction*> safepoints_; + // True if processing core registers. False if processing floating // point registers. bool processing_core_registers_; @@ -111,6 +183,14 @@ class RegisterAllocator { // Blocked registers, as decided by the code generator. bool* const blocked_registers_; + // Slots reserved for out arguments. + size_t reserved_out_slots_; + + // The maximum live registers at safepoints. + size_t maximum_number_of_live_registers_; + + FRIEND_TEST(RegisterAllocatorTest, FreeUntil); + DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); }; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 019d0f879c..535a768ea1 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -16,12 +16,14 @@ #include "builder.h" #include "code_generator.h" +#include "code_generator_x86.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator.h" #include "ssa_liveness_analysis.h" +#include "ssa_phi_elimination.h" #include "utils/arena_allocator.h" #include "gtest/gtest.h" @@ -40,12 +42,12 @@ static bool Check(const uint16_t* data) { graph->BuildDominatorTree(); graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); - CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); - RegisterAllocator register_allocator(&allocator, *codegen); - register_allocator.AllocateRegisters(liveness); - return register_allocator.Validate(liveness, false); + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + register_allocator.AllocateRegisters(); + return register_allocator.Validate(false); } /** @@ -56,7 +58,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); + x86::CodeGeneratorX86 codegen(graph); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -64,10 +66,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { static constexpr size_t ranges[][2] = {{0, 42}}; intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Reset(); } @@ -77,10 +81,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Reset(); } @@ -90,10 +96,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Reset(); } @@ -103,10 +111,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 47}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Reset(); } @@ -117,14 +127,17 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Get(0)->SplitAt(43); static constexpr size_t ranges2[][2] = {{42, 47}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); // Sibling of the first interval has no register allocated to it. - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); intervals.Get(0)->GetNextSibling()->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, 0, codegen, &allocator, true, false)); } } @@ -286,12 +299,12 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); - CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); - RegisterAllocator register_allocator(&allocator, *codegen); - register_allocator.AllocateRegisters(liveness); - ASSERT_TRUE(register_allocator.Validate(liveness, false)); + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + register_allocator.AllocateRegisters(); + ASSERT_TRUE(register_allocator.Validate(false)); HBasicBlock* loop_header = graph->GetBlocks().Get(2); HPhi* phi = loop_header->GetFirstPhi()->AsPhi(); @@ -303,8 +316,135 @@ TEST(RegisterAllocatorTest, Loop3) { ASSERT_NE(phi_interval->GetRegister(), loop_update->GetRegister()); HBasicBlock* return_block = graph->GetBlocks().Get(3); - HReturn* ret = return_block->GetFirstInstruction()->AsReturn(); + HReturn* ret = return_block->GetLastInstruction()->AsReturn(); ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister()); } +TEST(RegisterAllocatorTest, FirstRegisterUse) { + const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8, + Instruction::ADD_INT_LIT8 | 0 << 8, 1 << 8, + Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1, + Instruction::RETURN_VOID); + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = BuildSSAGraph(data, &allocator); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + liveness.Analyze(); + + HAdd* first_add = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsAdd(); + HAdd* last_add = graph->GetBlocks().Get(1)->GetLastInstruction()->GetPrevious()->AsAdd(); + ASSERT_EQ(last_add->InputAt(0), first_add); + LiveInterval* interval = first_add->GetLiveInterval(); + ASSERT_EQ(interval->GetEnd(), last_add->GetLifetimePosition()); + ASSERT_TRUE(interval->GetNextSibling() == nullptr); + + // We need a register for the output of the instruction. + ASSERT_EQ(interval->FirstRegisterUse(), first_add->GetLifetimePosition()); + + // Split at the next instruction. + interval = interval->SplitAt(first_add->GetLifetimePosition() + 2); + // The user of the split is the last add. + ASSERT_EQ(interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1); + + // Split before the last add. + LiveInterval* new_interval = interval->SplitAt(last_add->GetLifetimePosition() - 1); + // Ensure the current interval has no register use... + ASSERT_EQ(interval->FirstRegisterUse(), kNoLifetime); + // And the new interval has it for the last add. + ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() - 1); +} + +TEST(RegisterAllocatorTest, DeadPhi) { + /* Test for a dead loop phi taking as back-edge input a phi that also has + * this loop phi as input. Walking backwards in SsaDeadPhiElimination + * does not solve the problem because the loop phi will be visited last. + * + * Test the following snippet: + * int a = 0 + * do { + * if (true) { + * a = 2; + * } + * } while (true); + */ + + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::CONST_4 | 1 << 8 | 0, + Instruction::IF_NE | 1 << 8 | 1 << 12, 3, + Instruction::CONST_4 | 2 << 12 | 0 << 8, + Instruction::GOTO | 0xFD00, + Instruction::RETURN_VOID); + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = BuildSSAGraph(data, &allocator); + SsaDeadPhiElimination(graph).Run(); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + liveness.Analyze(); + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + register_allocator.AllocateRegisters(); + ASSERT_TRUE(register_allocator.Validate(false)); +} + +/** + * Test that the TryAllocateFreeReg method works in the presence of inactive intervals + * that share the same register. It should split the interval it is currently + * allocating for at the minimum lifetime position between the two inactive intervals. + */ +TEST(RegisterAllocatorTest, FreeUntil) { + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::RETURN); + + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = BuildSSAGraph(data, &allocator); + SsaDeadPhiElimination(graph).Run(); + x86::CodeGeneratorX86 codegen(graph); + SsaLivenessAnalysis liveness(*graph, &codegen); + liveness.Analyze(); + RegisterAllocator register_allocator(&allocator, &codegen, liveness); + + // Add an artifical range to cover the temps that will be put in the unhandled list. + LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval(); + unhandled->AddLoopRange(0, 60); + + // Add three temps holding the same register, and starting at different positions. + // Put the one that should be picked in the middle of the inactive list to ensure + // we do not depend on an order. + LiveInterval* interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt); + interval->SetRegister(0); + interval->AddRange(40, 50); + register_allocator.inactive_.Add(interval); + + interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt); + interval->SetRegister(0); + interval->AddRange(20, 30); + register_allocator.inactive_.Add(interval); + + interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt); + interval->SetRegister(0); + interval->AddRange(60, 70); + register_allocator.inactive_.Add(interval); + + register_allocator.number_of_registers_ = 1; + register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); + register_allocator.processing_core_registers_ = true; + register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; + + register_allocator.TryAllocateFreeReg(unhandled); + + // Check that we have split the interval. + ASSERT_EQ(1u, register_allocator.unhandled_->Size()); + // Check that we know need to find a new register where the next interval + // that uses the register starts. + ASSERT_EQ(20u, register_allocator.unhandled_->Get(0)->GetStart()); +} + } // namespace art diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 33084df94b..471307ec31 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -15,22 +15,12 @@ */ #include "ssa_builder.h" + #include "nodes.h" +#include "ssa_type_propagation.h" namespace art { -static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { - // We trust the verifier has already done the necessary checking. - switch (existing) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - case Primitive::kPrimNot: - return existing; - default: - return new_type; - } -} - void SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block visited // (with the exception of loops) in order to create the right environment for that @@ -44,18 +34,18 @@ void SsaBuilder::BuildSsa() { HBasicBlock* block = loop_headers_.Get(i); for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - Primitive::Type type = Primitive::kPrimVoid; for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) { HInstruction* input = ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber()); phi->AddInput(input); - type = MergeTypes(type, input->GetType()); } - phi->SetType(type); } } - // TODO: Now that the type of loop phis is set, we need a type propagation phase. - // 3) Clear locals. + // 3) Propagate types of phis. + SsaTypePropagation type_propagation(GetGraph()); + type_propagation.Run(); + + // 4) Clear locals. // TODO: Move this to a dead code eliminator phase. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); @@ -95,27 +85,33 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { // All predecessors have already been visited because we are visiting in reverse post order. // We merge the values of all locals, creating phis if those values differ. for (size_t local = 0; local < current_locals_->Size(); local++) { + bool one_predecessor_has_no_value = false; bool is_different = false; HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local); - for (size_t i = 1; i < block->GetPredecessors().Size(); i++) { - if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) { - is_different = true; + + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local); + if (current == nullptr) { + one_predecessor_has_no_value = true; break; + } else if (current != value) { + is_different = true; } } + + if (one_predecessor_has_no_value) { + // If one predecessor has no value for this local, we trust the verifier has + // successfully checked that there is a store dominating any read after this block. + continue; + } + if (is_different) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid); - Primitive::Type type = Primitive::kPrimVoid; for (size_t i = 0; i < block->GetPredecessors().Size(); i++) { HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(i), local); - // We need to merge the incoming types, as the Dex format does not - // guarantee the inputs have the same type. In particular the 0 constant is - // used for all types, but the graph builder treats it as an int. - type = MergeTypes(type, value->GetType()); phi->SetRawInputAt(i, value); } - phi->SetType(type); block->AddPhi(phi); value = phi; } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index dc4b2e59fc..cd13d81a36 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -15,6 +15,9 @@ */ #include "ssa_liveness_analysis.h" + +#include "base/bit_vector-inl.h" +#include "code_generator.h" #include "nodes.h" namespace art { @@ -80,38 +83,6 @@ static void VisitBlockForLinearization(HBasicBlock* block, order->Add(block); } -class HLinearOrderIterator : public ValueObject { - public: - explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order) - : post_order_(post_order), index_(post_order.Size()) {} - - bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return post_order_.Get(index_ -1); } - void Advance() { --index_; DCHECK_GE(index_, 0U); } - - private: - const GrowableArray<HBasicBlock*>& post_order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); -}; - -class HLinearPostOrderIterator : public ValueObject { - public: - explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order) - : post_order_(post_order), index_(0) {} - - bool Done() const { return index_ == post_order_.Size(); } - HBasicBlock* Current() const { return post_order_.Get(index_); } - void Advance() { ++index_; } - - private: - const GrowableArray<HBasicBlock*>& post_order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); -}; - void SsaLivenessAnalysis::LinearizeGraph() { // For simplicity of the implementation, we create post linear order. The order for // computing live ranges is the reverse of that order. @@ -131,30 +102,39 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + HGraphVisitor* location_builder = codegen_->GetLocationBuilder(); + for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); - lifetime_position += 2; for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->HasUses()) { + current->Accept(location_builder); + LocationSummary* locations = current->GetLocations(); + if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType())); + new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current)); } current->SetLifetimePosition(lifetime_position); } + lifetime_position += 2; + + // Add a null marker to notify we are starting a block. + instructions_from_lifetime_position_.Add(nullptr); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->HasUses()) { + current->Accept(codegen_->GetLocationBuilder()); + LocationSummary* locations = current->GetLocations(); + if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType())); + new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current)); } + instructions_from_lifetime_position_.Add(current); current->SetLifetimePosition(lifetime_position); lifetime_position += 2; } @@ -165,7 +145,7 @@ void SsaLivenessAnalysis::NumberInstructions() { } void SsaLivenessAnalysis::ComputeLiveness() { - for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block_infos_.Put( block->GetBlockId(), @@ -186,7 +166,7 @@ void SsaLivenessAnalysis::ComputeLiveness() { void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); BitVector* kill = GetKillSet(*block); @@ -201,7 +181,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) { HInstruction* phi = it.Current(); HInstruction* input = phi->InputAt(phi_input_index); - input->GetLiveInterval()->AddPhiUse(phi, block); + input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block); // A phi input whose last user is the phi dies at the end of the predecessor block, // and not at the phi's lifetime position. live_in->SetBit(input->GetSsaIndex()); @@ -209,6 +189,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } // Add a range that covers this block to all instructions live_in because of successors. + // Instructions defined in this block will have their start of the range adjusted. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd()); @@ -226,9 +207,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // All inputs of an instruction must be live. for (size_t i = 0, e = current->InputCount(); i < e; ++i) { HInstruction* input = current->InputAt(i); - DCHECK(input->HasSsaIndex()); - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current); + // Some instructions 'inline' their inputs, that is they do not need + // to be materialized. + if (input->HasSsaIndex()) { + live_in->SetBit(input->GetSsaIndex()); + input->GetLiveInterval()->AddUse(current, i, false); + } } if (current->HasEnvironment()) { @@ -239,7 +223,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { if (instruction != nullptr) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); - instruction->GetLiveInterval()->AddUse(current); + instruction->GetLiveInterval()->AddUse(current, i, true); } } } @@ -251,6 +235,10 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { if (current->HasSsaIndex()) { kill->SetBit(current->GetSsaIndex()); live_in->ClearBit(current->GetSsaIndex()); + LiveInterval* interval = current->GetLiveInterval(); + DCHECK((interval->GetFirstRange() == nullptr) + || (interval->GetStart() == current->GetLifetimePosition())); + interval->SetFrom(current->GetLifetimePosition()); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 4d56e1f9c1..c62e61b2cd 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -21,6 +21,8 @@ namespace art { +class CodeGenerator; + class BlockInfo : public ArenaObject { public: BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) @@ -45,7 +47,7 @@ class BlockInfo : public ArenaObject { }; /** - * A live range contains the start and end of a range where an instruction + * A live range contains the start and end of a range where an instruction or a temporary * is live. */ class LiveRange : public ArenaObject { @@ -87,26 +89,43 @@ class LiveRange : public ArenaObject { */ class UsePosition : public ArenaObject { public: - UsePosition(HInstruction* user, size_t position, UsePosition* next) - : user_(user), position_(position), next_(next) { - DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition()); + UsePosition(HInstruction* user, + size_t input_index, + bool is_environment, + size_t position, + UsePosition* next) + : user_(user), + input_index_(input_index), + is_environment_(is_environment), + position_(position), + next_(next) { + DCHECK(user->IsPhi() + || (GetPosition() == user->GetLifetimePosition() + 1) + || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } + void SetNext(UsePosition* next) { next_ = next; } HInstruction* GetUser() const { return user_; } - void Dump(std::ostream& stream) { + bool GetIsEnvironment() const { return is_environment_; } + + size_t GetInputIndex() const { return input_index_; } + + void Dump(std::ostream& stream) const { stream << position_; } private: HInstruction* const user_; + const size_t input_index_; + const bool is_environment_; const size_t position_; - UsePosition* const next_; + UsePosition* next_; DISALLOW_COPY_AND_ASSIGN(UsePosition); }; @@ -117,38 +136,97 @@ class UsePosition : public ArenaObject { */ class LiveInterval : public ArenaObject { public: - LiveInterval(ArenaAllocator* allocator, Primitive::Type type) + LiveInterval(ArenaAllocator* allocator, + Primitive::Type type, + HInstruction* defined_by = nullptr, + bool is_fixed = false, + int reg = kNoRegister, + bool is_temp = false, + bool is_slow_path_safepoint = false) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), first_use_(nullptr), type_(type), next_sibling_(nullptr), - register_(kNoRegister) {} + parent_(this), + register_(reg), + spill_slot_(kNoSpillSlot), + is_fixed_(is_fixed), + is_temp_(is_temp), + is_slow_path_safepoint_(is_slow_path_safepoint), + defined_by_(defined_by) {} + + static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) { + return new (allocator) LiveInterval( + allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true); + } + + static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { + return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); + } - void AddUse(HInstruction* instruction) { + static LiveInterval* MakeTempInterval(ArenaAllocator* allocator, + HInstruction* defined_by, + Primitive::Type type) { + return new (allocator) LiveInterval(allocator, type, defined_by, false, kNoRegister, true); + } + + bool IsFixed() const { return is_fixed_; } + bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; } + + void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { + // Set the use within the instruction. size_t position = instruction->GetLifetimePosition(); + if (instruction->GetLocations()->InputOverlapsWithOutputOrTemp(input_index, is_environment)) { + // If it overlaps, we need to make sure the user will not try to allocate a temp + // or its output to the same register. + ++position; + } + if ((first_use_ != nullptr) + && (first_use_->GetUser() == instruction) + && (first_use_->GetPosition() < position)) { + // The user uses the instruction multiple times, and one use dies before the other. + // We update the use list so that the latter is first. + DCHECK(first_use_->GetPosition() + 1 == position); + UsePosition* new_use = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_->GetNext()); + first_use_->SetNext(new_use); + if (first_range_->GetEnd() == first_use_->GetPosition()) { + first_range_->end_ = position; + } + return; + } + size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); - size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd(); if (first_range_ == nullptr) { // First time we see a use of that interval. - first_range_ = last_range_ = new (allocator_) LiveRange(start_block_position, position, nullptr); + first_range_ = last_range_ = new (allocator_) LiveRange( + start_block_position, position, nullptr); } else if (first_range_->GetStart() == start_block_position) { - // There is a use later in the same block. + // There is a use later in the same block or in a following block. + // Note that in such a case, `AddRange` for the whole blocks has been called + // before arriving in this method, and this is the reason the start of + // `first_range_` is before the given `position`. DCHECK_LE(position, first_range_->GetEnd()); - } else if (first_range_->GetStart() == end_block_position) { - // Last use is in the following block. - first_range_->start_ = start_block_position; } else { + DCHECK(first_range_->GetStart() > position); // There is a hole in the interval. Create a new range. + // Note that the start of `first_range_` can be equal to `end`: two blocks + // having adjacent lifetime positions are not necessarily + // predecessor/successor. When two blocks are predecessor/successor, the + // liveness algorithm has called `AddRange` before arriving in this method, + // and the check line 205 would succeed. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } - first_use_ = new (allocator_) UsePosition(instruction, position, first_use_); + first_use_ = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_); } - void AddPhiUse(HInstruction* instruction, HBasicBlock* block) { - DCHECK(instruction->AsPhi() != nullptr); - first_use_ = new (allocator_) UsePosition(instruction, block->GetLifetimeEnd(), first_use_); + void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { + DCHECK(instruction->IsPhi()); + first_use_ = new (allocator_) UsePosition( + instruction, input_index, false, block->GetLifetimeEnd(), first_use_); } void AddRange(size_t start, size_t end) { @@ -157,7 +235,10 @@ class LiveInterval : public ArenaObject { } else if (first_range_->GetStart() == end) { // There is a use in the following block. first_range_->start_ = start; + } else if (first_range_->GetStart() == start && first_range_->GetEnd() == end) { + DCHECK(is_fixed_); } else { + DCHECK_GT(first_range_->GetStart(), end); // There is a hole in the interval. Create a new range. first_range_ = new (allocator_) LiveRange(start, end, first_range_); } @@ -178,11 +259,27 @@ class LiveInterval : public ArenaObject { } } + bool HasSpillSlot() const { return spill_slot_ != kNoSpillSlot; } + void SetSpillSlot(int slot) { + DCHECK(!is_fixed_); + DCHECK(!is_temp_); + spill_slot_ = slot; + } + int GetSpillSlot() const { return spill_slot_; } + void SetFrom(size_t from) { - DCHECK(first_range_ != nullptr); - first_range_->start_ = from; + if (first_range_ != nullptr) { + first_range_->start_ = from; + } else { + // Instruction without uses. + DCHECK(!defined_by_->HasUses()); + DCHECK(from == defined_by_->GetLifetimePosition()); + first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr); + } } + LiveInterval* GetParent() const { return parent_; } + LiveRange* GetFirstRange() const { return first_range_; } int GetRegister() const { return register_; } @@ -190,11 +287,14 @@ class LiveInterval : public ArenaObject { void ClearRegister() { register_ = kNoRegister; } bool HasRegister() const { return register_ != kNoRegister; } - bool IsDeadAt(size_t position) { + bool IsDeadAt(size_t position) const { return last_range_->GetEnd() <= position; } - bool Covers(size_t position) { + bool Covers(size_t position) const { + if (IsDeadAt(position)) { + return false; + } LiveRange* current = first_range_; while (current != nullptr) { if (position >= current->GetStart() && position < current->GetEnd()) { @@ -208,27 +308,10 @@ class LiveInterval : public ArenaObject { /** * Returns the first intersection of this interval with `other`. */ - size_t FirstIntersectionWith(LiveInterval* other) { - // We only call this method if there is a lifetime hole in this interval - // at the start of `other`. - DCHECK(!Covers(other->GetStart())); - DCHECK_LE(GetStart(), other->GetStart()); - // Move to the range in this interval that starts after the other interval. - size_t other_start = other->GetStart(); - LiveRange* my_range = first_range_; - while (my_range != nullptr) { - if (my_range->GetStart() >= other_start) { - break; - } else { - my_range = my_range->GetNext(); - } - } - if (my_range == nullptr) { - return kNoLifetime; - } - + size_t FirstIntersectionWith(LiveInterval* other) const { // Advance both intervals and find the first matching range start in // this interval. + LiveRange* my_range = first_range_; LiveRange* other_range = other->first_range_; do { if (my_range->IntersectsWith(*other_range)) { @@ -252,16 +335,40 @@ class LiveInterval : public ArenaObject { return first_range_->GetStart(); } + size_t GetEnd() const { + return last_range_->GetEnd(); + } + size_t FirstRegisterUseAfter(size_t position) const { + if (is_temp_) { + return position == GetStart() ? position : kNoLifetime; + } + if (position == GetStart() && defined_by_ != nullptr) { + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { + return position; + } + } + } + UsePosition* use = first_use_; - while (use != nullptr) { + size_t end = GetEnd(); + while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - // TODO: Once we plug the Locations builder of the code generator - // to the register allocator, this method must be adjusted. We - // test if there is an environment, because these are currently the only - // instructions that could have more uses than the number of registers. - if (use_position >= position && !use->GetUser()->NeedsEnvironment()) { - return use_position; + if (use_position >= position && !use->GetIsEnvironment()) { + Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + // Return the lifetime just before the user, so that the interval has a register + // when entering the user. + return use->GetUser()->GetLifetimePosition() - 1; + } } use = use->GetNext(); } @@ -272,10 +379,18 @@ class LiveInterval : public ArenaObject { return FirstRegisterUseAfter(GetStart()); } + UsePosition* GetFirstUse() const { + return first_use_; + } + Primitive::Type GetType() const { return type_; } + HInstruction* GetDefinedBy() const { + return defined_by_; + } + /** * Split this interval at `position`. This interval is changed to: * [start ... position). @@ -284,7 +399,8 @@ class LiveInterval : public ArenaObject { * [position ... end) */ LiveInterval* SplitAt(size_t position) { - DCHECK(next_sibling_ == nullptr); + DCHECK(!is_temp_); + DCHECK(!is_fixed_); DCHECK_GT(position, GetStart()); if (last_range_->GetEnd() <= position) { @@ -293,7 +409,9 @@ class LiveInterval : public ArenaObject { } LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_); + new_interval->next_sibling_ = next_sibling_; next_sibling_ = new_interval; + new_interval->parent_ = parent_; new_interval->first_use_ = first_use_; LiveRange* current = first_range_; @@ -338,12 +456,12 @@ class LiveInterval : public ArenaObject { return nullptr; } - bool StartsBefore(LiveInterval* other) const { + bool StartsBeforeOrAt(LiveInterval* other) const { return GetStart() <= other->GetStart(); } bool StartsAfter(LiveInterval* other) const { - return GetStart() >= other->GetStart(); + return GetStart() > other->GetStart(); } void Dump(std::ostream& stream) const { @@ -383,21 +501,42 @@ class LiveInterval : public ArenaObject { // Live interval that is the result of a split. LiveInterval* next_sibling_; + // The first interval from which split intervals come from. + LiveInterval* parent_; + // The register allocated to this interval. int register_; + // The spill slot allocated to this interval. + int spill_slot_; + + // Whether the interval is for a fixed register. + const bool is_fixed_; + + // Whether the interval is for a temporary. + const bool is_temp_; + + // Whether the interval is for a safepoint that calls on slow path. + const bool is_slow_path_safepoint_; + + // The instruction represented by this interval. + HInstruction* const defined_by_; + static constexpr int kNoRegister = -1; + static constexpr int kNoSpillSlot = -1; DISALLOW_COPY_AND_ASSIGN(LiveInterval); }; class SsaLivenessAnalysis : public ValueObject { public: - explicit SsaLivenessAnalysis(const HGraph& graph) + SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen) : graph_(graph), + codegen_(codegen), linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()), block_infos_(graph.GetArena(), graph.GetBlocks().Size()), instructions_from_ssa_index_(graph.GetArena(), 0), + instructions_from_lifetime_position_(graph.GetArena(), 0), number_of_ssa_values_(0) { block_infos_.SetSize(graph.GetBlocks().Size()); } @@ -424,6 +563,14 @@ class SsaLivenessAnalysis : public ValueObject { return instructions_from_ssa_index_.Get(index); } + HInstruction* GetInstructionFromPosition(size_t index) const { + return instructions_from_lifetime_position_.Get(index); + } + + size_t GetMaxLifetimePosition() const { + return instructions_from_lifetime_position_.Size() * 2 - 1; + } + size_t GetNumberOfSsaValues() const { return number_of_ssa_values_; } @@ -458,14 +605,52 @@ class SsaLivenessAnalysis : public ValueObject { bool UpdateLiveOut(const HBasicBlock& block); const HGraph& graph_; + CodeGenerator* const codegen_; GrowableArray<HBasicBlock*> linear_post_order_; GrowableArray<BlockInfo*> block_infos_; + + // Temporary array used when computing live_in, live_out, and kill sets. GrowableArray<HInstruction*> instructions_from_ssa_index_; + + // Temporary array used when inserting moves in the graph. + GrowableArray<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; +class HLinearOrderIterator : public ValueObject { + public: + explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness) + : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {} + + bool Done() const { return index_ == 0; } + HBasicBlock* Current() const { return post_order_.Get(index_ -1); } + void Advance() { --index_; DCHECK_GE(index_, 0U); } + + private: + const GrowableArray<HBasicBlock*>& post_order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); +}; + +class HLinearPostOrderIterator : public ValueObject { + public: + explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness) + : post_order_(liveness.GetLinearPostOrder()), index_(0) {} + + bool Done() const { return index_ == post_order_.Size(); } + HBasicBlock* Current() const { return post_order_.Get(index_); } + void Advance() { ++index_; } + + private: + const GrowableArray<HBasicBlock*>& post_order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc new file mode 100644 index 0000000000..e02a182ec8 --- /dev/null +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ssa_phi_elimination.h" + +namespace art { + +void SsaDeadPhiElimination::Run() { + // Add to the worklist phis referenced by non-phi instructions. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + if (phi->HasEnvironmentUses()) { + // TODO: Do we want to keep that phi alive? + worklist_.Add(phi); + phi->SetLive(); + continue; + } + for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction>* current = it.Current(); + HInstruction* user = current->GetUser(); + if (!user->IsPhi()) { + worklist_.Add(phi); + phi->SetLive(); + } else { + phi->SetDead(); + } + } + } + } + + // Process the worklist by propagating liveness to phi inputs. + while (!worklist_.IsEmpty()) { + HPhi* phi = worklist_.Pop(); + for (HInputIterator it(phi); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi() && input->AsPhi()->IsDead()) { + worklist_.Add(input->AsPhi()); + input->AsPhi()->SetLive(); + } + } + } + + // Remove phis that are not live. Visit in post order so that phis + // that are not inputs of loop phis can be removed when they have + // no users left (dead phis might use dead phis). + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + HInstruction* current = block->GetFirstPhi(); + HInstruction* next = nullptr; + while (current != nullptr) { + next = current->GetNext(); + if (current->AsPhi()->IsDead()) { + if (current->HasUses()) { + for (HUseIterator<HInstruction> it(current->GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction>* user_node = it.Current(); + HInstruction* user = user_node->GetUser(); + DCHECK(user->IsLoopHeaderPhi()); + DCHECK(user->AsPhi()->IsDead()); + // Just put itself as an input. The phi will be removed in this loop anyway. + user->SetRawInputAt(user_node->GetIndex(), user); + current->RemoveUser(user, user_node->GetIndex()); + } + } + block->RemovePhi(current->AsPhi()); + } + current = next; + } + } +} + +void SsaRedundantPhiElimination::Run() { + // Add all phis in the worklist. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + worklist_.Add(it.Current()->AsPhi()); + } + } + + while (!worklist_.IsEmpty()) { + HPhi* phi = worklist_.Pop(); + + // If the phi has already been processed, continue. + if (!phi->IsInBlock()) { + continue; + } + + // Find if the inputs of the phi are the same instruction. + HInstruction* candidate = phi->InputAt(0); + // A loop phi cannot have itself as the first phi. Note that this + // check relies on our simplification pass ensuring the pre-header + // block is first in the list of predecessors of the loop header. + DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor()); + DCHECK_NE(phi, candidate); + + for (size_t i = 1; i < phi->InputCount(); ++i) { + HInstruction* input = phi->InputAt(i); + // For a loop phi, if the input is the phi, the phi is still candidate for + // elimination. + if (input != candidate && input != phi) { + candidate = nullptr; + break; + } + } + + // If the inputs are not the same, continue. + if (candidate == nullptr) { + continue; + } + + if (phi->IsInLoop()) { + // Because we're updating the users of this phi, we may have new + // phis candidate for elimination if this phi is in a loop. Add phis that + // used this phi to the worklist. + for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction>* current = it.Current(); + HInstruction* user = current->GetUser(); + if (user->IsPhi()) { + worklist_.Add(user->AsPhi()); + } + } + } + phi->ReplaceWith(candidate); + phi->GetBlock()->RemovePhi(phi); + } +} + +} // namespace art diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h new file mode 100644 index 0000000000..5274f09f3f --- /dev/null +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ + +#include "nodes.h" + +namespace art { + +/** + * Optimization phase that removes dead phis from the graph. Dead phis are unused + * phis, or phis only used by other phis. + */ +class SsaDeadPhiElimination : public ValueObject { + public: + explicit SsaDeadPhiElimination(HGraph* graph) + : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + + void Run(); + + private: + HGraph* const graph_; + GrowableArray<HPhi*> worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; + + DISALLOW_COPY_AND_ASSIGN(SsaDeadPhiElimination); +}; + +/** + * Removes redundant phis that may have been introduced when doing SSA conversion. + * For example, when entering a loop, we create phis for all live registers. These + * registers might be updated with the same value, or not updated at all. We can just + * replace the phi with the value when entering the loop. + */ +class SsaRedundantPhiElimination : public ValueObject { + public: + explicit SsaRedundantPhiElimination(HGraph* graph) + : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + + void Run(); + + private: + HGraph* const graph_; + GrowableArray<HPhi*> worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; + + DISALLOW_COPY_AND_ASSIGN(SsaRedundantPhiElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index d10461980d..fffe5c2b44 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -84,9 +84,19 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_NE(graph, nullptr); graph->BuildDominatorTree(); + // Suspend checks implementation may change in the future, and this test relies + // on how instructions are ordered. + RemoveSuspendChecks(graph); graph->TransformToSSA(); ReNumberInstructions(graph); + // Test that phis had their type set. + for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { + for (HInstructionIterator it(graph->GetBlocks().Get(i)->GetPhis()); !it.Done(); it.Advance()) { + ASSERT_NE(it.Current()->GetType(), Primitive::kPrimVoid); + } + } + SsaPrettyPrinter printer(graph); printer.VisitInsertionOrder(); @@ -99,7 +109,7 @@ TEST(SsaTest, CFG1) { "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [2, 2]\n" " 1: Goto\n" - "BasicBlock 1, pred: 0, succ: 2, 5\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" " 2: Equal(0, 0) [3]\n" " 3: If(2)\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -129,7 +139,7 @@ TEST(SsaTest, CFG2) { " 0: IntConstant 0 [6, 3, 3]\n" " 1: IntConstant 4 [6]\n" " 2: Goto\n" - "BasicBlock 1, pred: 0, succ: 2, 5\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" " 3: Equal(0, 0) [4]\n" " 4: If(3)\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -197,8 +207,8 @@ TEST(SsaTest, Loop1) { "BasicBlock 2, pred: 3, 6, succ: 3\n" " 4: Phi(6, 0) [6]\n" " 5: Goto\n" - "BasicBlock 3, pred: 2, 5, succ: 2\n" - " 6: Phi(4, 0) [4]\n" + "BasicBlock 3, pred: 5, 2, succ: 2\n" + " 6: Phi(0, 4) [4]\n" " 7: Goto\n" "BasicBlock 4\n" // Synthesized blocks to avoid critical edge. @@ -288,8 +298,8 @@ TEST(SsaTest, Loop4) { " 2: Goto\n" "BasicBlock 1, pred: 0, succ: 4\n" " 3: Goto\n" - "BasicBlock 2, pred: 3, 4, succ: 5, 3\n" - " 4: Phi(1, 0) [9, 5, 5]\n" + "BasicBlock 2, pred: 4, 3, succ: 5, 3\n" + " 4: Phi(0, 1) [9, 5, 5]\n" " 5: Equal(4, 4) [6]\n" " 6: If(5)\n" "BasicBlock 3, pred: 2, succ: 2\n" @@ -329,8 +339,8 @@ TEST(SsaTest, Loop5) { " 6: Goto\n" "BasicBlock 3, pred: 1, succ: 8\n" " 7: Goto\n" - "BasicBlock 4, pred: 5, 8, succ: 6, 5\n" - " 8: Phi(8, 14) [8, 12, 9, 9]\n" + "BasicBlock 4, pred: 8, 5, succ: 6, 5\n" + " 8: Phi(14, 8) [8, 12, 9, 9]\n" " 9: Equal(8, 8) [10]\n" " 10: If(9)\n" "BasicBlock 5, pred: 4, succ: 4\n" @@ -409,7 +419,7 @@ TEST(SsaTest, Loop7) { " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 5, succ: 3, 8\n" + "BasicBlock 2, pred: 1, 5, succ: 8, 3\n" " 5: Phi(0, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" @@ -459,4 +469,73 @@ TEST(SsaTest, DeadLocal) { TestCode(data, expected); } +TEST(SsaTest, LocalInIf) { + // Test that we do not create a phi in the join block when one predecessor + // does not update the local. + const char* expected = + "BasicBlock 0, succ: 1\n" + " 0: IntConstant 0 [3, 3]\n" + " 1: IntConstant 4\n" + " 2: Goto\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" + " 3: Equal(0, 0) [4]\n" + " 4: If(3)\n" + "BasicBlock 2, pred: 1, succ: 3\n" + " 5: Goto\n" + "BasicBlock 3, pred: 2, 5, succ: 4\n" + " 6: ReturnVoid\n" + "BasicBlock 4, pred: 3\n" + " 7: Exit\n" + // Synthesized block to avoid critical edge. + "BasicBlock 5, pred: 1, succ: 3\n" + " 8: Goto\n"; + + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 3, + Instruction::CONST_4 | 4 << 12 | 1 << 8, + Instruction::RETURN_VOID); + + TestCode(data, expected); +} + +TEST(SsaTest, MultiplePredecessors) { + // Test that we do not create a phi when one predecessor + // does not update the local. + const char* expected = + "BasicBlock 0, succ: 1\n" + " 0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n" + " 1: Goto\n" + "BasicBlock 1, pred: 0, succ: 3, 2\n" + " 2: Equal(0, 0) [3]\n" + " 3: If(2)\n" + "BasicBlock 2, pred: 1, succ: 5\n" + " 4: Add(0, 0)\n" + " 5: Goto\n" + "BasicBlock 3, pred: 1, succ: 7, 4\n" + " 6: Equal(0, 0) [7]\n" + " 7: If(6)\n" + "BasicBlock 4, pred: 3, succ: 5\n" + " 8: Add(0, 0)\n" + " 9: Goto\n" + // This block should not get a phi for local 1. + "BasicBlock 5, pred: 2, 4, 7, succ: 6\n" + " 10: ReturnVoid\n" + "BasicBlock 6, pred: 5\n" + " 11: Exit\n" + "BasicBlock 7, pred: 3, succ: 5\n" + " 12: Goto\n"; + + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 5, + Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, + Instruction::GOTO | 0x0500, + Instruction::IF_EQ, 4, + Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, + Instruction::RETURN_VOID); + + TestCode(data, expected); +} + } // namespace art diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc new file mode 100644 index 0000000000..a860cb7cfe --- /dev/null +++ b/compiler/optimizing/ssa_type_propagation.cc @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ssa_type_propagation.h" + +#include "nodes.h" + +namespace art { + +static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { + // We trust the verifier has already done the necessary checking. + switch (existing) { + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + case Primitive::kPrimNot: + return existing; + default: + // Phis are initialized with a void type, so if we are asked + // to merge with a void type, we should use the existing one. + return new_type == Primitive::kPrimVoid + ? existing + : new_type; + } +} + +// Re-compute and update the type of the instruction. Returns +// whether or not the type was changed. +static bool UpdateType(HPhi* phi) { + Primitive::Type existing = phi->GetType(); + + Primitive::Type new_type = Primitive::kPrimVoid; + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + Primitive::Type input_type = phi->InputAt(i)->GetType(); + new_type = MergeTypes(new_type, input_type); + } + phi->SetType(new_type); + return existing != new_type; +} + +void SsaTypePropagation::Run() { + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + ProcessWorklist(); +} + +void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) { + if (block->IsLoopHeader()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + // Set the initial type for the phi. Use the non back edge input for reaching + // a fixed point faster. + phi->SetType(phi->InputAt(0)->GetType()); + AddToWorklist(phi); + } + } else { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + if (UpdateType(phi)) { + AddDependentInstructionsToWorklist(phi); + } + } + } +} + +void SsaTypePropagation::ProcessWorklist() { + while (!worklist_.IsEmpty()) { + HPhi* instruction = worklist_.Pop(); + if (UpdateType(instruction)) { + AddDependentInstructionsToWorklist(instruction); + } + } +} + +void SsaTypePropagation::AddToWorklist(HPhi* instruction) { + worklist_.Add(instruction); +} + +void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { + for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->GetUser()->AsPhi(); + if (phi != nullptr) { + AddToWorklist(phi); + } + } +} + +} // namespace art diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h new file mode 100644 index 0000000000..5f471a9811 --- /dev/null +++ b/compiler/optimizing/ssa_type_propagation.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ +#define ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ + +#include "nodes.h" + +namespace art { + +// Compute and propagate types of phis in the graph. +class SsaTypePropagation : public ValueObject { + public: + explicit SsaTypePropagation(HGraph* graph) + : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + + void Run(); + + private: + void VisitBasicBlock(HBasicBlock* block); + void ProcessWorklist(); + void AddToWorklist(HPhi* phi); + void AddDependentInstructionsToWorklist(HPhi* phi); + + HGraph* const graph_; + GrowableArray<HPhi*> worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; + + DISALLOW_COPY_AND_ASSIGN(SsaTypePropagation); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h new file mode 100644 index 0000000000..0ea11ad04b --- /dev/null +++ b/compiler/optimizing/stack_map_stream.h @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ +#define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ + +#include "base/bit_vector.h" +#include "memory_region.h" +#include "stack_map.h" +#include "utils/allocation.h" +#include "utils/growable_array.h" + +namespace art { + +/** + * Collects and builds stack maps for a method. All the stack maps + * for a method are placed in a CodeInfo object. + */ +class StackMapStream : public ValueObject { + public: + explicit StackMapStream(ArenaAllocator* allocator) + : stack_maps_(allocator, 10), + dex_register_maps_(allocator, 10 * 4), + inline_infos_(allocator, 2), + stack_mask_max_(-1), + number_of_stack_maps_with_inline_info_(0) {} + + // Compute bytes needed to encode a mask with the given maximum element. + static uint32_t StackMaskEncodingSize(int max_element) { + int number_of_bits = max_element + 1; // Need room for max element too. + return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte; + } + + // See runtime/stack_map.h to know what these fields contain. + struct StackMapEntry { + uint32_t dex_pc; + uint32_t native_pc_offset; + uint32_t register_mask; + BitVector* sp_mask; + uint32_t num_dex_registers; + uint8_t inlining_depth; + size_t dex_register_maps_start_index; + size_t inline_infos_start_index; + }; + + struct DexRegisterEntry { + DexRegisterMap::LocationKind kind; + int32_t value; + }; + + struct InlineInfoEntry { + uint32_t method_index; + }; + + void AddStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth) { + StackMapEntry entry; + entry.dex_pc = dex_pc; + entry.native_pc_offset = native_pc_offset; + entry.register_mask = register_mask; + entry.sp_mask = sp_mask; + entry.num_dex_registers = num_dex_registers; + entry.inlining_depth = inlining_depth; + entry.dex_register_maps_start_index = dex_register_maps_.Size(); + entry.inline_infos_start_index = inline_infos_.Size(); + stack_maps_.Add(entry); + + if (sp_mask != nullptr) { + stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); + } + if (inlining_depth > 0) { + number_of_stack_maps_with_inline_info_++; + } + } + + void AddDexRegisterEntry(DexRegisterMap::LocationKind kind, int32_t value) { + DexRegisterEntry entry; + entry.kind = kind; + entry.value = value; + dex_register_maps_.Add(entry); + } + + void AddInlineInfoEntry(uint32_t method_index) { + InlineInfoEntry entry; + entry.method_index = method_index; + inline_infos_.Add(entry); + } + + size_t ComputeNeededSize() const { + return CodeInfo::kFixedSize + + ComputeStackMapSize() + + ComputeDexRegisterMapSize() + + ComputeInlineInfoSize(); + } + + size_t ComputeStackMapSize() const { + return stack_maps_.Size() * (StackMap::kFixedSize + StackMaskEncodingSize(stack_mask_max_)); + } + + size_t ComputeDexRegisterMapSize() const { + // We currently encode all dex register information per stack map. + return stack_maps_.Size() * DexRegisterMap::kFixedSize + // For each dex register entry. + + (dex_register_maps_.Size() * DexRegisterMap::SingleEntrySize()); + } + + size_t ComputeInlineInfoSize() const { + return inline_infos_.Size() * InlineInfo::SingleEntrySize() + // For encoding the depth. + + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); + } + + size_t ComputeInlineInfoStart() const { + return ComputeDexRegisterMapStart() + ComputeDexRegisterMapSize(); + } + + size_t ComputeDexRegisterMapStart() const { + return CodeInfo::kFixedSize + ComputeStackMapSize(); + } + + void FillIn(MemoryRegion region) { + CodeInfo code_info(region); + code_info.SetOverallSize(region.size()); + + size_t stack_mask_size = StackMaskEncodingSize(stack_mask_max_); + uint8_t* memory_start = region.start(); + + MemoryRegion dex_register_maps_region = region.Subregion( + ComputeDexRegisterMapStart(), + ComputeDexRegisterMapSize()); + + MemoryRegion inline_infos_region = region.Subregion( + ComputeInlineInfoStart(), + ComputeInlineInfoSize()); + + code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetStackMaskSize(stack_mask_size); + + uintptr_t next_dex_register_map_offset = 0; + uintptr_t next_inline_info_offset = 0; + for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + StackMap stack_map = code_info.GetStackMapAt(i); + StackMapEntry entry = stack_maps_.Get(i); + + stack_map.SetDexPc(entry.dex_pc); + stack_map.SetNativePcOffset(entry.native_pc_offset); + stack_map.SetRegisterMask(entry.register_mask); + if (entry.sp_mask != nullptr) { + stack_map.SetStackMask(*entry.sp_mask); + } + + // Set the register map. + MemoryRegion region = dex_register_maps_region.Subregion( + next_dex_register_map_offset, + DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); + next_dex_register_map_offset += region.size(); + DexRegisterMap dex_register_map(region); + stack_map.SetDexRegisterMapOffset(region.start() - memory_start); + + for (size_t i = 0; i < entry.num_dex_registers; ++i) { + DexRegisterEntry register_entry = + dex_register_maps_.Get(i + entry.dex_register_maps_start_index); + dex_register_map.SetRegisterInfo(i, register_entry.kind, register_entry.value); + } + + // Set the inlining info. + if (entry.inlining_depth != 0) { + MemoryRegion region = inline_infos_region.Subregion( + next_inline_info_offset, + InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize()); + next_inline_info_offset += region.size(); + InlineInfo inline_info(region); + + stack_map.SetInlineDescriptorOffset(region.start() - memory_start); + + inline_info.SetDepth(entry.inlining_depth); + for (size_t i = 0; i < entry.inlining_depth; ++i) { + InlineInfoEntry inline_entry = inline_infos_.Get(i + entry.inline_infos_start_index); + inline_info.SetMethodReferenceIndexAtDepth(i, inline_entry.method_index); + } + } else { + stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo); + } + } + } + + private: + GrowableArray<StackMapEntry> stack_maps_; + GrowableArray<DexRegisterEntry> dex_register_maps_; + GrowableArray<InlineInfoEntry> inline_infos_; + int stack_mask_max_; + size_t number_of_stack_maps_with_inline_info_; + + DISALLOW_COPY_AND_ASSIGN(StackMapStream); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc new file mode 100644 index 0000000000..5ee6ae049c --- /dev/null +++ b/compiler/optimizing/stack_map_test.cc @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stack_map.h" +#include "stack_map_stream.h" +#include "utils/arena_bit_vector.h" + +#include "gtest/gtest.h" + +namespace art { + +bool SameBits(MemoryRegion region, const BitVector& bit_vector) { + for (size_t i = 0; i < region.size_in_bits(); ++i) { + if (region.LoadBit(i) != bit_vector.IsBitSet(i)) { + return false; + } + } + return true; +} + +TEST(StackMapTest, Test1) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask(&arena, 0, false); + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, 2, 0); + stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); + stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + ASSERT_FALSE(stack_map.HasInlineInfo()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask)); + + DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); + ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); + ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); + ASSERT_EQ(0, dex_registers.GetValue(0)); + ASSERT_EQ(-2, dex_registers.GetValue(1)); +} + +TEST(StackMapTest, Test2) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask1(&arena, 0, true); + sp_mask1.SetBit(2); + sp_mask1.SetBit(4); + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2); + stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0); + stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2); + stream.AddInlineInfoEntry(42); + stream.AddInlineInfoEntry(82); + + ArenaBitVector sp_mask2(&arena, 0, true); + sp_mask2.SetBit(3); + sp_mask1.SetBit(8); + stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, 1, 0); + stream.AddDexRegisterEntry(DexRegisterMap::kInRegister, 0); + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + ASSERT_EQ(1u, code_info.GetStackMaskSize()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask()); + + MemoryRegion stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + + DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); + ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); + ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); + ASSERT_EQ(0, dex_registers.GetValue(0)); + ASSERT_EQ(-2, dex_registers.GetValue(1)); + + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); + ASSERT_EQ(2u, inline_info.GetDepth()); + ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); + ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); + + stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(128u, stack_map.GetNativePcOffset()); + ASSERT_EQ(0xFFu, stack_map.GetRegisterMask()); + + stack_mask = stack_map.GetStackMask(); + ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); + + ASSERT_FALSE(stack_map.HasInlineInfo()); +} + +} // namespace art diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc new file mode 100644 index 0000000000..2e48ee8e7e --- /dev/null +++ b/compiler/optimizing/suspend_check_test.cc @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "builder.h" +#include "dex_instruction.h" +#include "nodes.h" +#include "optimizing_unit_test.h" + +#include "gtest/gtest.h" + +namespace art { + +/** + * Check that the HGraphBuilder adds suspend checks to backward branches. + */ + +static void TestCode(const uint16_t* data) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraphBuilder builder(&allocator); + const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); + HGraph* graph = builder.BuildGraph(*item); + ASSERT_NE(graph, nullptr); + + HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors().Get(0); + HInstruction* first_instruction = first_block->GetFirstInstruction(); + // Account for some tests having a store local as first instruction. + ASSERT_TRUE(first_instruction->IsSuspendCheck() + || first_instruction->GetNext()->IsSuspendCheck()); +} + +TEST(CodegenTest, CFG1) { + const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + Instruction::NOP, + Instruction::GOTO | 0xFF00); + + TestCode(data); +} + +TEST(CodegenTest, CFG2) { + const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + Instruction::GOTO_32, 0, 0); + + TestCode(data); +} + +TEST(CodegenTest, CFG3) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 0xFFFF, + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(CodegenTest, CFG4) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_NE, 0xFFFF, + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(CodegenTest, CFG5) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQZ, 0xFFFF, + Instruction::RETURN_VOID); + + TestCode(data); +} + +TEST(CodegenTest, CFG6) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_NEZ, 0xFFFF, + Instruction::RETURN_VOID); + + TestCode(data); +} +} // namespace art |