diff options
26 files changed, 843 insertions, 227 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f8be21a06e..b60eebf1ba 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -521,7 +521,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, move_resolver_(graph->GetArena(), this), isa_features_(isa_features), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -5669,6 +5670,51 @@ void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) } } +void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + + // Constant area pointer. + locations->SetInAt(1, Location::RequiresRegister()); + + // And the temporary we need. + locations->AddTemp(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Optimizing has a jump area. + Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); + Register constant_area = locations->InAt(1).AsRegister<Register>(); + + // Remove the bias, if needed. + if (lower_bound != 0) { + __ leal(temp_reg, Address(value_reg, -lower_bound)); + value_reg = temp_reg; + } + + // Is the value in range? + DCHECK_GE(num_entries, 1); + __ cmpl(value_reg, Immediate(num_entries - 1)); + __ j(kAbove, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load (target-constant_area) from the jump table, indexing by the value. + __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg)); + + // Compute the actual target address by adding in constant_area. + __ addl(temp_reg, constant_area); + + // And jump. + __ jmp(temp_reg); +} + void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress( HX86ComputeBaseMethodAddress* insn) { LocationSummary* locations = @@ -5752,28 +5798,18 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons } } -void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { - // Generate the constant area if needed. - X86Assembler* assembler = GetAssembler(); - if (!assembler->IsConstantAreaEmpty()) { - // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 - // byte values. - assembler->Align(4, 0); - constant_area_start_ = assembler->CodeSize(); - assembler->AddConstantArea(); - } - - // And finish up. - CodeGenerator::Finalize(allocator); -} - /** * Class to handle late fixup of offsets into constant area. */ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: - RIPFixup(const CodeGeneratorX86& codegen, int offset) - : codegen_(codegen), offset_into_constant_area_(offset) {} + RIPFixup(CodeGeneratorX86& codegen, size_t offset) + : codegen_(&codegen), offset_into_constant_area_(offset) {} + + protected: + void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } + + CodeGeneratorX86* codegen_; private: void Process(const MemoryRegion& region, int pos) OVERRIDE { @@ -5781,19 +5817,77 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera // last 4 bytes of the instruction. // The value to patch is the distance from the offset in the constant area // from the address computed by the HX86ComputeBaseMethodAddress instruction. - int32_t constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; - int32_t relative_position = constant_offset - codegen_.GetMethodAddressOffset();; + int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; + int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();; // Patch in the right value. region.StoreUnaligned<int32_t>(pos - 4, relative_position); } - const CodeGeneratorX86& codegen_; - // Location in constant area that the fixup refers to. - int offset_into_constant_area_; + int32_t offset_into_constant_area_; }; +/** + * Class to handle late fixup of offsets to a jump table that will be created in the + * constant area. + */ +class JumpTableRIPFixup : public RIPFixup { + public: + JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr) + : RIPFixup(codegen, static_cast<size_t>(-1)), switch_instr_(switch_instr) {} + + void CreateJumpTable() { + X86Assembler* assembler = codegen_->GetAssembler(); + + // Ensure that the reference to the jump table has the correct offset. + const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); + SetOffset(offset_in_constant_table); + + // The label values in the jump table are computed relative to the + // instruction addressing the constant area. + const int32_t relative_offset = codegen_->GetMethodAddressOffset(); + + // Populate the jump table with the correct values for the jump table. + int32_t num_entries = switch_instr_->GetNumEntries(); + HBasicBlock* block = switch_instr_->GetBlock(); + const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); + // The value that we want is the target offset - the position of the table. + for (int32_t i = 0; i < num_entries; i++) { + HBasicBlock* b = successors[i]; + Label* l = codegen_->GetLabelOf(b); + DCHECK(l->IsBound()); + int32_t offset_to_block = l->Position() - relative_offset; + assembler->AppendInt32(offset_to_block); + } + } + + private: + const HX86PackedSwitch* switch_instr_; +}; + +void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + X86Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. + assembler->Align(4, 0); + constant_area_start_ = assembler->CodeSize(); + + // Populate any jump tables. + for (auto jump_table : fixups_to_jump_tables_) { + jump_table->CreateJumpTable(); + } + + // And now add the constant area to the generated code. + assembler->AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + Address CodeGeneratorX86::LiteralDoubleAddress(double v, Register reg) { AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); return Address(reg, kDummy32BitOffset, fixup); @@ -5814,6 +5908,20 @@ Address CodeGeneratorX86::LiteralInt64Address(int64_t v, Register reg) { return Address(reg, kDummy32BitOffset, fixup); } +Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, + Register reg, + Register value) { + // Create a fixup to be used to create and address the jump table. + JumpTableRIPFixup* table_fixup = + new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); + + // We have to populate the jump tables. + fixups_to_jump_tables_.push_back(table_fixup); + + // We want a scaled address, as we are extracting the correct offset from the table. + return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup); +} + /** * Finds instructions that need the constant area base as an input. */ @@ -5864,6 +5972,21 @@ class ConstantHandlerVisitor : public HGraphVisitor { } } + void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to + // address the constant area. + InitializeConstantAreaPointer(switch_insn); + HGraph* graph = GetGraph(); + HBasicBlock* block = switch_insn->GetBlock(); + HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( + switch_insn->GetStartValue(), + switch_insn->GetNumEntries(), + switch_insn->InputAt(0), + base_, + switch_insn->GetDexPc()); + block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); + } + void InitializeConstantAreaPointer(HInstruction* user) { // Ensure we only initialize the pointer once. if (base_ != nullptr) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index ae2d84f945..fdfc5ab69b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -245,6 +245,8 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86); }; +class JumpTableRIPFixup; + class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, @@ -385,6 +387,8 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralInt32Address(int32_t v, Register reg); Address LiteralInt64Address(int64_t v, Register reg); + Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); + void Finalize(CodeAllocator* allocator) OVERRIDE; private: @@ -405,6 +409,9 @@ class CodeGeneratorX86 : public CodeGenerator { // Used for fixups to the constant area. int32_t constant_area_start_; + // Fixups for jump tables that need to be patched after the constant table is generated. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + // If there is a HX86ComputeBaseMethodAddress instruction in the graph // (which shall be the sole instruction of this kind), subtracting this offset // from the value contained in the out register of this HX86ComputeBaseMethodAddress diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 21120a0c80..f0d9420f87 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -670,7 +670,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, constant_area_start_(0), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -5322,31 +5323,43 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); int32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); - CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + // Remove the bias, if needed. + Register value_reg_out = value_reg_in.AsRegister(); + if (lower_bound != 0) { + __ leal(temp_reg, Address(value_reg_in, -lower_bound)); + value_reg_out = temp_reg.AsRegister(); } + CpuRegister value_reg(value_reg_out); - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); - } + // Is the value in range? + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + __ cmpl(value_reg, Immediate(num_entries - 1)); + __ j(kAbove, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load the address of the jump table in the constant area. + __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); + + // Load the (signed) offset from the jump table. + __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); + + // Add the offset to the address of the table base. + __ addq(temp_reg, base_reg); + + // And jump. + __ jmp(temp_reg); } void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { @@ -5372,15 +5385,85 @@ void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { } } +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { + public: + RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) + : codegen_(&codegen), offset_into_constant_area_(offset) {} + + protected: + void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } + + CodeGeneratorX86_64* codegen_; + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; + int32_t relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + // Location in constant area that the fixup refers to. + size_t offset_into_constant_area_; +}; + +/** + t * Class to handle late fixup of offsets to a jump table that will be created in the + * constant area. + */ +class JumpTableRIPFixup : public RIPFixup { + public: + JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) + : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} + + void CreateJumpTable() { + X86_64Assembler* assembler = codegen_->GetAssembler(); + + // Ensure that the reference to the jump table has the correct offset. + const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); + SetOffset(offset_in_constant_table); + + // Compute the offset from the start of the function to this jump table. + const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; + + // Populate the jump table with the correct values for the jump table. + int32_t num_entries = switch_instr_->GetNumEntries(); + HBasicBlock* block = switch_instr_->GetBlock(); + const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); + // The value that we want is the target offset - the position of the table. + for (int32_t i = 0; i < num_entries; i++) { + HBasicBlock* b = successors[i]; + Label* l = codegen_->GetLabelOf(b); + DCHECK(l->IsBound()); + int32_t offset_to_block = l->Position() - current_table_offset; + assembler->AppendInt32(offset_to_block); + } + } + + private: + const HPackedSwitch* switch_instr_; +}; + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); - if (!assembler->IsConstantAreaEmpty()) { - // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 - // byte values. If used for vectors at a later time, this will need to be - // updated to 16 bytes with the appropriate offset. + if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. assembler->Align(4, 0); constant_area_start_ = assembler->CodeSize(); + + // Populate any jump tables. + for (auto jump_table : fixups_to_jump_tables_) { + jump_table->CreateJumpTable(); + } + + // And now add the constant area to the generated code. assembler->AddConstantArea(); } @@ -5388,31 +5471,6 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -/** - * Class to handle late fixup of offsets into constant area. - */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { - public: - RIPFixup(const CodeGeneratorX86_64& codegen, int offset) - : codegen_(codegen), offset_into_constant_area_(offset) {} - - private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { - // Patch the correct offset for the instruction. We use the address of the - // 'next' instruction, which is 'pos' (patch the 4 bytes before). - int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; - int relative_position = constant_offset - pos; - - // Patch in the right value. - region.StoreUnaligned<int32_t>(pos - 4, relative_position); - } - - const CodeGeneratorX86_64& codegen_; - - // Location in constant area that the fixup refers to. - int offset_into_constant_area_; -}; - Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); return Address::RIP(fixup); @@ -5453,6 +5511,16 @@ void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type t GetMoveResolver()->EmitNativeCode(¶llel_move); } +Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { + // Create a fixup to be used to create and address the jump table. + JumpTableRIPFixup* table_fixup = + new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); + + // We have to populate the jump tables. + fixups_to_jump_tables_.push_back(table_fixup); + return Address::RIP(table_fixup); +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d6a6a7e760..dc86a48ce7 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -234,6 +234,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); }; +// Class for fixups to jump tables. +class JumpTableRIPFixup; + class CodeGeneratorX86_64 : public CodeGenerator { public: CodeGeneratorX86_64(HGraph* graph, @@ -354,6 +357,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Load a 64 bit value into a register in the most efficient manner. void Load64BitValue(CpuRegister dest, int64_t value); + Address LiteralCaseTable(HPackedSwitch* switch_instr); // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); @@ -391,6 +395,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { // We will fix this up in the linker later to have the right value. static constexpr int32_t kDummy32BitOffset = 256; + // Fixups for jump tables need to be handled specially. + ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 24a89bca4e..ed401b67c5 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -606,8 +606,23 @@ static void UpdateInputsUsers(HInstruction* instruction) { void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement) { DCHECK(initial->GetBlock() == this); - InsertInstructionBefore(replacement, initial); - initial->ReplaceWith(replacement); + if (initial->IsControlFlow()) { + // We can only replace a control flow instruction with another control flow instruction. + DCHECK(replacement->IsControlFlow()); + DCHECK_EQ(replacement->GetId(), -1); + DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid); + DCHECK_EQ(initial->GetBlock(), this); + DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid); + DCHECK(initial->GetUses().IsEmpty()); + DCHECK(initial->GetEnvUses().IsEmpty()); + replacement->SetBlock(this); + replacement->SetId(GetGraph()->GetNextInstructionId()); + instructions_.InsertInstructionBefore(replacement, initial); + UpdateInputsUsers(replacement); + } else { + InsertInstructionBefore(replacement, initial); + initial->ReplaceWith(replacement); + } RemoveInstruction(initial); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 82909c41b6..0d668e8cf7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1074,7 +1074,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ M(X86ComputeBaseMethodAddress, Instruction) \ - M(X86LoadFromConstantTable, Instruction) + M(X86LoadFromConstantTable, Instruction) \ + M(X86PackedSwitch, Instruction) #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index f7cc872419..556217bf74 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -62,6 +62,45 @@ class HX86LoadFromConstantTable : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable); }; +// X86 version of HPackedSwitch that holds a pointer to the base method address. +class HX86PackedSwitch : public HTemplateInstruction<2> { + public: + HX86PackedSwitch(int32_t start_value, + int32_t num_entries, + HInstruction* input, + HX86ComputeBaseMethodAddress* method_base, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None(), dex_pc), + start_value_(start_value), + num_entries_(num_entries) { + SetRawInputAt(0, input); + SetRawInputAt(1, method_base); + } + + bool IsControlFlow() const OVERRIDE { return true; } + + int32_t GetStartValue() const { return start_value_; } + + int32_t GetNumEntries() const { return num_entries_; } + + HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const { + return InputAt(1)->AsX86ComputeBaseMethodAddress(); + } + + HBasicBlock* GetDefaultBlock() const { + // Last entry is the default block. + return GetBlock()->GetSuccessors()[num_entries_]; + } + + DECLARE_INSTRUCTION(X86PackedSwitch); + + private: + const int32_t start_value_; + const int32_t num_entries_; + + DISALLOW_COPY_AND_ASSIGN(HX86PackedSwitch); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_X86_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 04e815aa1d..5347bf0302 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -2369,44 +2369,48 @@ void X86Assembler::AddConstantArea() { } } -int ConstantArea::AddInt32(int32_t v) { +size_t ConstantArea::AppendInt32(int32_t v) { + size_t result = buffer_.size() * elem_size_; + buffer_.push_back(v); + return result; +} + +size_t ConstantArea::AddInt32(int32_t v) { for (size_t i = 0, e = buffer_.size(); i < e; i++) { if (v == buffer_[i]) { - return i * kEntrySize; + return i * elem_size_; } } // Didn't match anything. - int result = buffer_.size() * kEntrySize; - buffer_.push_back(v); - return result; + return AppendInt32(v); } -int ConstantArea::AddInt64(int64_t v) { +size_t ConstantArea::AddInt64(int64_t v) { int32_t v_low = Low32Bits(v); int32_t v_high = High32Bits(v); if (buffer_.size() > 1) { // Ensure we don't pass the end of the buffer. for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) { if (v_low == buffer_[i] && v_high == buffer_[i + 1]) { - return i * kEntrySize; + return i * elem_size_; } } } // Didn't match anything. - int result = buffer_.size() * kEntrySize; + size_t result = buffer_.size() * elem_size_; buffer_.push_back(v_low); buffer_.push_back(v_high); return result; } -int ConstantArea::AddDouble(double v) { +size_t ConstantArea::AddDouble(double v) { // Treat the value as a 64-bit integer value. return AddInt64(bit_cast<int64_t, double>(v)); } -int ConstantArea::AddFloat(float v) { +size_t ConstantArea::AddFloat(float v) { // Treat the value as a 32-bit integer value. return AddInt32(bit_cast<int32_t, float>(v)); } diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 93ecdf52fe..b50fda907a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -166,6 +166,39 @@ class Address : public Operand { Init(base_in, disp.Int32Value()); } + Address(Register index_in, ScaleFactor scale_in, int32_t disp) { + CHECK_NE(index_in, ESP); // Illegal addressing mode. + SetModRM(0, ESP); + SetSIB(scale_in, index_in, EBP); + SetDisp32(disp); + } + + Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) { + Init(base_in, index_in, scale_in, disp); + } + + Address(Register base_in, + Register index_in, + ScaleFactor scale_in, + int32_t disp, AssemblerFixup *fixup) { + Init(base_in, index_in, scale_in, disp); + SetFixup(fixup); + } + + static Address Absolute(uintptr_t addr) { + Address result; + result.SetModRM(0, EBP); + result.SetDisp32(addr); + return result; + } + + static Address Absolute(ThreadOffset<4> addr) { + return Absolute(addr.Int32Value()); + } + + private: + Address() {} + void Init(Register base_in, int32_t disp) { if (disp == 0 && base_in != EBP) { SetModRM(0, base_in); @@ -181,14 +214,7 @@ class Address : public Operand { } } - Address(Register index_in, ScaleFactor scale_in, int32_t disp) { - CHECK_NE(index_in, ESP); // Illegal addressing mode. - SetModRM(0, ESP); - SetSIB(scale_in, index_in, EBP); - SetDisp32(disp); - } - - Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) { + void Init(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) { CHECK_NE(index_in, ESP); // Illegal addressing mode. if (disp == 0 && base_in != EBP) { SetModRM(0, ESP); @@ -203,20 +229,6 @@ class Address : public Operand { SetDisp32(disp); } } - - static Address Absolute(uintptr_t addr) { - Address result; - result.SetModRM(0, EBP); - result.SetDisp32(addr); - return result; - } - - static Address Absolute(ThreadOffset<4> addr) { - return Absolute(addr.Int32Value()); - } - - private: - Address() {} }; @@ -252,40 +264,39 @@ class ConstantArea { // Add a double to the constant area, returning the offset into // the constant area where the literal resides. - int AddDouble(double v); + size_t AddDouble(double v); // Add a float to the constant area, returning the offset into // the constant area where the literal resides. - int AddFloat(float v); + size_t AddFloat(float v); // Add an int32_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt32(int32_t v); + size_t AddInt32(int32_t v); + + // Add an int32_t to the end of the constant area, returning the offset into + // the constant area where the literal resides. + size_t AppendInt32(int32_t v); // Add an int64_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt64(int64_t v); + size_t AddInt64(int64_t v); bool IsEmpty() const { return buffer_.size() == 0; } - const std::vector<int32_t>& GetBuffer() const { - return buffer_; - } - - void AddFixup(AssemblerFixup* fixup) { - fixups_.push_back(fixup); + size_t GetSize() const { + return buffer_.size() * elem_size_; } - const std::vector<AssemblerFixup*>& GetFixups() const { - return fixups_; + const std::vector<int32_t>& GetBuffer() const { + return buffer_; } private: - static constexpr size_t kEntrySize = sizeof(int32_t); + static constexpr size_t elem_size_ = sizeof(int32_t); std::vector<int32_t> buffer_; - std::vector<AssemblerFixup*> fixups_; }; class X86Assembler FINAL : public Assembler { @@ -740,26 +751,36 @@ class X86Assembler FINAL : public Assembler { // Add a double to the constant area, returning the offset into // the constant area where the literal resides. - int AddDouble(double v) { return constant_area_.AddDouble(v); } + size_t AddDouble(double v) { return constant_area_.AddDouble(v); } // Add a float to the constant area, returning the offset into // the constant area where the literal resides. - int AddFloat(float v) { return constant_area_.AddFloat(v); } + size_t AddFloat(float v) { return constant_area_.AddFloat(v); } // Add an int32_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt32(int32_t v) { return constant_area_.AddInt32(v); } + size_t AddInt32(int32_t v) { + return constant_area_.AddInt32(v); + } + + // Add an int32_t to the end of the constant area, returning the offset into + // the constant area where the literal resides. + size_t AppendInt32(int32_t v) { + return constant_area_.AppendInt32(v); + } // Add an int64_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt64(int64_t v) { return constant_area_.AddInt64(v); } + size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); } // Add the contents of the constant area to the assembler buffer. void AddConstantArea(); // Is the constant area empty? Return true if there are no literals in the constant area. bool IsConstantAreaEmpty() const { return constant_area_.IsEmpty(); } - void AddConstantAreaFixup(AssemblerFixup* fixup) { constant_area_.AddFixup(fixup); } + + // Return the current size of the constant area. + size_t ConstantAreaSize() const { return constant_area_.GetSize(); } private: inline void EmitUint8(uint8_t value); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 6e7d74d528..9eb5e67041 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -3122,7 +3122,14 @@ void X86_64Assembler::AddConstantArea() { } } -int ConstantArea::AddInt32(int32_t v) { +size_t ConstantArea::AppendInt32(int32_t v) { + size_t result = buffer_.size() * elem_size_; + buffer_.push_back(v); + return result; +} + +size_t ConstantArea::AddInt32(int32_t v) { + // Look for an existing match. for (size_t i = 0, e = buffer_.size(); i < e; i++) { if (v == buffer_[i]) { return i * elem_size_; @@ -3130,12 +3137,10 @@ int ConstantArea::AddInt32(int32_t v) { } // Didn't match anything. - int result = buffer_.size() * elem_size_; - buffer_.push_back(v); - return result; + return AppendInt32(v); } -int ConstantArea::AddInt64(int64_t v) { +size_t ConstantArea::AddInt64(int64_t v) { int32_t v_low = v; int32_t v_high = v >> 32; if (buffer_.size() > 1) { @@ -3148,18 +3153,18 @@ int ConstantArea::AddInt64(int64_t v) { } // Didn't match anything. - int result = buffer_.size() * elem_size_; + size_t result = buffer_.size() * elem_size_; buffer_.push_back(v_low); buffer_.push_back(v_high); return result; } -int ConstantArea::AddDouble(double v) { +size_t ConstantArea::AddDouble(double v) { // Treat the value as a 64-bit integer value. return AddInt64(bit_cast<int64_t, double>(v)); } -int ConstantArea::AddFloat(float v) { +size_t ConstantArea::AddFloat(float v) { // Treat the value as a 32-bit integer value. return AddInt32(bit_cast<int32_t, float>(v)); } diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 255f551675..01d28e305d 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -269,36 +269,40 @@ class Address : public Operand { * Class to handle constant area values. */ class ConstantArea { - public: - ConstantArea() {} + public: + ConstantArea() {} - // Add a double to the constant area, returning the offset into - // the constant area where the literal resides. - int AddDouble(double v); + // Add a double to the constant area, returning the offset into + // the constant area where the literal resides. + size_t AddDouble(double v); - // Add a float to the constant area, returning the offset into - // the constant area where the literal resides. - int AddFloat(float v); + // Add a float to the constant area, returning the offset into + // the constant area where the literal resides. + size_t AddFloat(float v); - // Add an int32_t to the constant area, returning the offset into - // the constant area where the literal resides. - int AddInt32(int32_t v); + // Add an int32_t to the constant area, returning the offset into + // the constant area where the literal resides. + size_t AddInt32(int32_t v); - // Add an int64_t to the constant area, returning the offset into - // the constant area where the literal resides. - int AddInt64(int64_t v); + // Add an int32_t to the end of the constant area, returning the offset into + // the constant area where the literal resides. + size_t AppendInt32(int32_t v); - int GetSize() const { - return buffer_.size() * elem_size_; - } + // Add an int64_t to the constant area, returning the offset into + // the constant area where the literal resides. + size_t AddInt64(int64_t v); - const std::vector<int32_t>& GetBuffer() const { - return buffer_; - } + size_t GetSize() const { + return buffer_.size() * elem_size_; + } - private: - static constexpr size_t elem_size_ = sizeof(int32_t); - std::vector<int32_t> buffer_; + const std::vector<int32_t>& GetBuffer() const { + return buffer_; + } + + private: + static constexpr size_t elem_size_ = sizeof(int32_t); + std::vector<int32_t> buffer_; }; @@ -806,19 +810,27 @@ class X86_64Assembler FINAL : public Assembler { // Add a double to the constant area, returning the offset into // the constant area where the literal resides. - int AddDouble(double v) { return constant_area_.AddDouble(v); } + size_t AddDouble(double v) { return constant_area_.AddDouble(v); } // Add a float to the constant area, returning the offset into // the constant area where the literal resides. - int AddFloat(float v) { return constant_area_.AddFloat(v); } + size_t AddFloat(float v) { return constant_area_.AddFloat(v); } // Add an int32_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt32(int32_t v) { return constant_area_.AddInt32(v); } + size_t AddInt32(int32_t v) { + return constant_area_.AddInt32(v); + } + + // Add an int32_t to the end of the constant area, returning the offset into + // the constant area where the literal resides. + size_t AppendInt32(int32_t v) { + return constant_area_.AppendInt32(v); + } // Add an int64_t to the constant area, returning the offset into // the constant area where the literal resides. - int AddInt64(int64_t v) { return constant_area_.AddInt64(v); } + size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); } // Add the contents of the constant area to the assembler buffer. void AddConstantArea(); @@ -826,6 +838,9 @@ class X86_64Assembler FINAL : public Assembler { // Is the constant area empty? Return true if there are no literals in the constant area. bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; } + // Return the current size of the constant area. + size_t ConstantAreaSize() const { return constant_area_.GetSize(); } + // // Heap poisoning. // diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index d09631bc71..930bb2c9a7 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -891,7 +891,109 @@ END art_quick_set64_instance ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. -GENERATE_ALL_ALLOC_ENTRYPOINTS +GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_rosalloc + // Fast path rosalloc allocation. + // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current + // r2, r3, r12: free. + ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array + // Load the class (r2) + ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] + cbz r2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class + // Check class status. + ldr r3, [r2, #MIRROR_CLASS_STATUS_OFFSET] + cmp r3, #MIRROR_CLASS_STATUS_INITIALIZED + bne .Lart_quick_alloc_object_rosalloc_slow_path + // Add a fake dependence from the + // following access flag and size + // loads to the status load. + // This is to prevent those loads + // from being reordered above the + // status load and reading wrong + // values (an alternative is to use + // a load-acquire for the status). + eor r3, r3, r3 + add r2, r2, r3 + // Check access flags has + // kAccClassIsFinalizable + ldr r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] + tst r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE + bne .Lart_quick_alloc_object_rosalloc_slow_path + + ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local + // allocation stack has room. + // TODO: consider using ldrd. + ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] + cmp r3, r12 + bhs .Lart_quick_alloc_object_rosalloc_slow_path + + ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (r3) + cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread + // local allocation + bhs .Lart_quick_alloc_object_rosalloc_slow_path + // Compute the rosalloc bracket index + // from the size. + // Align up the size by the rosalloc + // bracket quantum size and divide + // by the quantum size and subtract + // by 1. This code is a shorter but + // equivalent version. + sub r3, r3, #1 + lsr r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT + // Load the rosalloc run (r12) + add r12, r9, r3, lsl #POINTER_SIZE_SHIFT + ldr r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET] + // Load the free list head (r3). This + // will be the return val. + ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + cbz r3, .Lart_quick_alloc_object_rosalloc_slow_path + // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. + ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head + // and update the list head with the + // next pointer. + str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + // Store the class pointer in the + // header. This also overwrites the + // next pointer. The offsets are + // asserted to match. +#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET +#error "Class pointer needs to overwrite next pointer." +#endif + str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET] + // Push the new object onto the thread + // local allocation stack and + // increment the thread local + // allocation stack top. + ldr r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) + str r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + // Decrement the size of the free list + ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + sub r1, #1 + // TODO: consider combining this store + // and the list head store above using + // strd. + str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + // Fence. This is "ish" not "ishst" so + // that the code after this allocation + // site will see the right values in + // the fields of the class. + // Alternatively we could use "ishst" + // if we use load-acquire for the + // class status load.) + dmb ish + mov r0, r3 // Set the return value and return. + bx lr + +.Lart_quick_alloc_object_rosalloc_slow_path: + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC + mov r2, r9 @ pass Thread::Current + bl artAllocObjectFromCodeRosAlloc @ (uint32_t type_idx, Method* method, Thread*) + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END art_quick_alloc_object_rosalloc /* * Called by managed code when the value in rSUSPEND has been decremented to 0. diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index ef5edbb227..fbacdbc930 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -113,7 +113,8 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMal GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) +// This is to be separately defined for each architecture to allow a hand-written assembly fast path. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc) diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 4a106e44c6..2f485ae644 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -788,6 +788,7 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 5c413d20f8..95f0ccb419 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -809,6 +809,7 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). DEFINE_FUNCTION art_quick_alloc_object_tlab // Fast path tlab allocation. diff --git a/runtime/asm_support.h b/runtime/asm_support.h index d98fc5179f..69f6fe96ff 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -19,6 +19,7 @@ #if defined(__cplusplus) #include "art_method.h" +#include "gc/allocator/rosalloc.h" #include "lock_word.h" #include "mirror/class.h" #include "mirror/string.h" @@ -53,6 +54,14 @@ static inline void CheckAsmSupportOffsetsAndSizes() { #define ADD_TEST_EQ(x, y) #endif +#if defined(__LP64__) +#define POINTER_SIZE_SHIFT 3 +#else +#define POINTER_SIZE_SHIFT 2 +#endif +ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT), + static_cast<size_t>(__SIZEOF_POINTER__)) + // Size of references to the heap on the stack. #define STACK_REFERENCE_SIZE 4 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>)) @@ -62,6 +71,10 @@ ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReferenc ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), sizeof(art::mirror::CompressedReference<art::mirror::Object>)) +#define COMPRESSED_REFERENCE_SIZE_SHIFT 2 +ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT), + static_cast<size_t>(COMPRESSED_REFERENCE_SIZE)) + // Note: these callee save methods loads require read barriers. // Offset of field Runtime::callee_save_methods_[kSaveAll] #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0 @@ -120,6 +133,18 @@ ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET, #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.rosalloc_runs. +#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET, + art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top. +#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET, + art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end. +#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET, + art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value()) // Offsets within java.lang.Object. #define MIRROR_OBJECT_CLASS_OFFSET 0 @@ -236,6 +261,44 @@ ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), ~static_cast<uint32_t>(art::kObjectAlignment - 1)) +#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128 +ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4 +ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15 +ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0 +ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32), + ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0 +ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64), + ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_RUN_FREE_LIST_OFFSET 8 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset())) + +#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset())) + +#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset())) + +#define ROSALLOC_SLOT_NEXT_OFFSET 0 +ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset())) +// Assert this so that we can avoid zeroing the next field by installing the class pointer. +ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET) + #if defined(__cplusplus) } // End of CheckAsmSupportOffsets. #endif diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 02f2e0b207..b569f5010e 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -3486,28 +3486,31 @@ bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass, if (!klass->IsInterface()) { // Initialize interfaces with default methods for the JLS. size_t num_direct_interfaces = klass->NumDirectInterfaces(); - for (size_t i = 0; i < num_direct_interfaces; i++) { + // Only setup the (expensive) handle scope if we actually need to. + if (UNLIKELY(num_direct_interfaces > 0)) { StackHandleScope<1> hs_iface(self); - Handle<mirror::Class> handle_scope_iface( - hs_iface.NewHandle(mirror::Class::GetDirectInterface(self, klass, i))); - CHECK(handle_scope_iface.Get() != nullptr); - CHECK(handle_scope_iface->IsInterface()); - if (handle_scope_iface->HasBeenRecursivelyInitialized()) { - // We have already done this once for this interface. Skip it. - continue; - } - // We cannot just call initialize class directly because we need to ensure that ALL interfaces - // with default methods are initialized. Non-default interface initialization will not affect - // other non-default super-interfaces. - bool iface_initialized = InitializeDefaultInterfaceRecursive(self, - handle_scope_iface, - can_init_statics, - can_init_parents); - if (!iface_initialized) { - ObjectLock<mirror::Class> lock(self, klass); - // Initialization failed because one of our interfaces with default methods is erroneous. - mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self); - return false; + MutableHandle<mirror::Class> handle_scope_iface(hs_iface.NewHandle<mirror::Class>(nullptr)); + for (size_t i = 0; i < num_direct_interfaces; i++) { + handle_scope_iface.Assign(mirror::Class::GetDirectInterface(self, klass, i)); + CHECK(handle_scope_iface.Get() != nullptr); + CHECK(handle_scope_iface->IsInterface()); + if (handle_scope_iface->HasBeenRecursivelyInitialized()) { + // We have already done this for this interface. Skip it. + continue; + } + // We cannot just call initialize class directly because we need to ensure that ALL + // interfaces with default methods are initialized. Non-default interface initialization + // will not affect other non-default super-interfaces. + bool iface_initialized = InitializeDefaultInterfaceRecursive(self, + handle_scope_iface, + can_init_statics, + can_init_parents); + if (!iface_initialized) { + ObjectLock<mirror::Class> lock(self, klass); + // Initialization failed because one of our interfaces with default methods is erroneous. + mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self); + return false; + } } } } @@ -3609,18 +3612,22 @@ bool ClassLinker::InitializeDefaultInterfaceRecursive(Thread* self, bool can_init_parents) { CHECK(iface->IsInterface()); size_t num_direct_ifaces = iface->NumDirectInterfaces(); - // First we initialize all of iface's super-interfaces recursively. - for (size_t i = 0; i < num_direct_ifaces; i++) { - mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i); - if (!super_iface->HasBeenRecursivelyInitialized()) { - // Recursive step - StackHandleScope<1> hs(self); - Handle<mirror::Class> handle_super_iface(hs.NewHandle(super_iface)); - if (!InitializeDefaultInterfaceRecursive(self, - handle_super_iface, - can_init_statics, - can_init_parents)) { - return false; + // Only create the (expensive) handle scope if we need it. + if (UNLIKELY(num_direct_ifaces > 0)) { + StackHandleScope<1> hs(self); + MutableHandle<mirror::Class> handle_super_iface(hs.NewHandle<mirror::Class>(nullptr)); + // First we initialize all of iface's super-interfaces recursively. + for (size_t i = 0; i < num_direct_ifaces; i++) { + mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i); + if (!super_iface->HasBeenRecursivelyInitialized()) { + // Recursive step + handle_super_iface.Assign(super_iface); + if (!InitializeDefaultInterfaceRecursive(self, + handle_super_iface, + can_init_statics, + can_init_parents)) { + return false; + } } } } diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 87f1392920..3ce3d634f5 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -131,6 +131,7 @@ class RosAlloc { private: Slot* next_; // Next slot in the list. + friend class RosAlloc; }; // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to @@ -302,6 +303,7 @@ class RosAlloc { // free without traversing the whole free list. uint32_t size_; uint32_t padding_ ATTRIBUTE_UNUSED; + friend class RosAlloc; }; // Represents a run of memory slots of the same size. @@ -482,7 +484,7 @@ class RosAlloc { static constexpr uint8_t kMagicNumFree = 43; // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_. static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets; - // The number of smaller size brackets that are 16 bytes apart. + // The number of smaller size brackets that are the quantum size apart. static constexpr size_t kNumOfQuantumSizeBrackets = 32; // The sizes (the slot sizes, in bytes) of the size brackets. static size_t bracketSizes[kNumOfSizeBrackets]; @@ -520,9 +522,7 @@ class RosAlloc { } // Returns true if the given allocation size is for a thread local allocation. static bool IsSizeForThreadLocal(size_t size) { - DCHECK_GT(kNumThreadLocalSizeBrackets, 0U); - size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1; - bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx]; + bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; DCHECK(size > kLargeSizeThreshold || (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); return is_size_for_thread_local; @@ -634,6 +634,16 @@ class RosAlloc { // are less than this index. We use shared (current) runs for the rest. static const size_t kNumThreadLocalSizeBrackets = 8; + // The size of the largest bracket we use thread-local runs for. + // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. + static const size_t kMaxThreadLocalBracketSize = 128; + + // The bracket size increment for the brackets of size <= 512 bytes. + static constexpr size_t kBracketQuantumSize = 16; + + // Equal to Log2(kQuantumBracketSizeIncrement). + static constexpr size_t kBracketQuantumSizeShift = 4; + private: // The base address of the memory region that's managed by this allocator. uint8_t* base_; @@ -770,6 +780,19 @@ class RosAlloc { size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); ~RosAlloc(); + static size_t RunFreeListOffset() { + return OFFSETOF_MEMBER(Run, free_list_); + } + static size_t RunFreeListHeadOffset() { + return OFFSETOF_MEMBER(SlotFreeList<false>, head_); + } + static size_t RunFreeListSizeOffset() { + return OFFSETOF_MEMBER(SlotFreeList<false>, size_); + } + static size_t RunSlotNextOffset() { + return OFFSETOF_MEMBER(Slot, next_); + } + // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 1a6beadd37..8e6eae921a 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -74,7 +74,7 @@ static jlongArray ConvertDexFilesToJavaArray(JNIEnv* env, const OatFile* oat_file, std::vector<std::unique_ptr<const DexFile>>& vec) { // Add one for the oat file. - jlongArray long_array = env->NewLongArray(static_cast<jsize>(1u + vec.size())); + jlongArray long_array = env->NewLongArray(static_cast<jsize>(kDexFileIndexStart + vec.size())); if (env->ExceptionCheck() == JNI_TRUE) { return nullptr; } @@ -230,7 +230,8 @@ static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) { } } - if (all_deleted) { + // oat_file can be null if we are running without dex2oat. + if (all_deleted && oat_file != nullptr) { // If all of the dex files are no longer in use we can unmap the corresponding oat file. VLOG(class_linker) << "Unregistering " << oat_file; runtime->GetOatFileManager().UnRegisterAndDeleteOatFile(oat_file); diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 3371a3955e..9eee156bb0 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -22,7 +22,7 @@ #include "base/logging.h" #include "base/stl_util.h" -#include "dex_file.h" +#include "dex_file-inl.h" #include "gc/space/image_space.h" #include "oat_file_assistant.h" #include "thread-inl.h" @@ -30,7 +30,9 @@ namespace art { // For b/21333911. -static constexpr bool kDuplicateClassesCheck = false; +// Only enabled for debug builds to prevent bit rot. There are too many performance regressions for +// normal builds. +static constexpr bool kDuplicateClassesCheck = kIsDebugBuild; const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) { WriterMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_); @@ -115,9 +117,9 @@ class DexFileAndClassPair : ValueObject { current_class_index_(current_class_index), from_loaded_oat_(from_loaded_oat) {} - DexFileAndClassPair(DexFileAndClassPair&& rhs) = default; + DexFileAndClassPair(const DexFileAndClassPair& rhs) = default; - DexFileAndClassPair& operator=(DexFileAndClassPair&& rhs) = default; + DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) = default; const char* GetCachedDescriptor() const { return cached_descriptor_; @@ -139,7 +141,7 @@ class DexFileAndClassPair : ValueObject { void Next() { ++current_class_index_; - cached_descriptor_ = nullptr; + cached_descriptor_ = GetClassDescriptor(dex_file_.get(), current_class_index_); } size_t GetCurrentClassIndex() const { @@ -162,7 +164,7 @@ class DexFileAndClassPair : ValueObject { } const char* cached_descriptor_; - std::unique_ptr<const DexFile> dex_file_; + std::shared_ptr<const DexFile> dex_file_; size_t current_class_index_; bool from_loaded_oat_; // We only need to compare mismatches between what we load now // and what was loaded before. Any old duplicates must have been @@ -215,8 +217,17 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, // Add dex files from already loaded oat files, but skip boot. const OatFile* boot_oat = GetBootOatFile(); + // The same OatFile can be loaded multiple times at different addresses. In this case, we don't + // need to check both against each other since they would have resolved the same way at compile + // time. + std::unordered_set<std::string> unique_locations; for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) { - if (loaded_oat_file.get() != boot_oat) { + DCHECK_NE(loaded_oat_file.get(), oat_file); + const std::string& location = loaded_oat_file->GetLocation(); + if (loaded_oat_file.get() != boot_oat && + location != oat_file->GetLocation() && + unique_locations.find(location) == unique_locations.end()) { + unique_locations.insert(location); AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue); } } @@ -232,12 +243,12 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, // Now drain the queue. while (!queue.empty()) { // Modifying the top element is only safe if we pop right after. - DexFileAndClassPair compare_pop(std::move(const_cast<DexFileAndClassPair&>(queue.top()))); + DexFileAndClassPair compare_pop(queue.top()); queue.pop(); // Compare against the following elements. while (!queue.empty()) { - DexFileAndClassPair top(std::move(const_cast<DexFileAndClassPair&>(queue.top()))); + DexFileAndClassPair top(queue.top()); if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) { // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files. @@ -249,7 +260,6 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, top.GetDexFile()->GetLocation().c_str()); return true; } - // Pop it. queue.pop(); AddNext(&top, &queue); } else { diff --git a/runtime/thread.h b/runtime/thread.h index 8cea10c844..8f3461acdf 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -626,6 +626,24 @@ class Thread { return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects)); } + template<size_t pointer_size> + static ThreadOffset<pointer_size> RosAllocRunsOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + rosalloc_runs)); + } + + template<size_t pointer_size> + static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + thread_local_alloc_stack_top)); + } + + template<size_t pointer_size> + static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + thread_local_alloc_stack_end)); + } + // Size of stack less any space reserved for stack overflow size_t GetStackSize() const { return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin); diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc index 33c90e3000..02c93cf864 100644 --- a/runtime/verifier/register_line.cc +++ b/runtime/verifier/register_line.cc @@ -391,6 +391,34 @@ void RegisterLine::PopMonitor(MethodVerifier* verifier, uint32_t reg_idx) { } } +// Check whether there is another register in the search map that is locked the same way as the +// register in the src map. This establishes an alias. +static bool FindLockAliasedRegister( + uint32_t src, + const AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier>& src_map, + const AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier>& search_map) { + auto it = src_map.find(src); + if (it == src_map.end()) { + // "Not locked" is trivially aliased. + return true; + } + uint32_t src_lock_levels = it->second; + if (src_lock_levels == 0) { + // "Not locked" is trivially aliased. + return true; + } + + // Scan the map for the same value. + for (const std::pair<uint32_t, uint32_t>& pair : search_map) { + if (pair.first != src && pair.second == src_lock_levels) { + return true; + } + } + + // Nothing found, no alias. + return false; +} + bool RegisterLine::MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line) { bool changed = false; DCHECK(incoming_line != nullptr); @@ -417,9 +445,29 @@ bool RegisterLine::MergeRegisters(MethodVerifier* verifier, const RegisterLine* size_t depths = reg_to_lock_depths_.count(idx); size_t incoming_depths = incoming_line->reg_to_lock_depths_.count(idx); if (depths != incoming_depths) { - if (depths == 0 || incoming_depths == 0) { - reg_to_lock_depths_.erase(idx); - } else { + // Stack levels aren't matching. This is potentially bad, as we don't do a + // flow-sensitive analysis. + // However, this could be an alias of something locked in one path, and the alias was + // destroyed in another path. It is fine to drop this as long as there's another alias + // for the lock around. The last vanishing alias will then report that things would be + // left unlocked. We need to check for aliases for both lock levels. + // + // Example (lock status in curly braces as pair of register and lock leels): + // + // lock v1 {v1=1} + // | | + // v0 = v1 {v0=1, v1=1} v0 = v2 {v1=1} + // | | + // {v1=1} + // // Dropping v0, as the status can't be merged + // // but the lock info ("locked at depth 1" and) + // // "not locked at all") is available. + if (!FindLockAliasedRegister(idx, + reg_to_lock_depths_, + reg_to_lock_depths_) || + !FindLockAliasedRegister(idx, + incoming_line->reg_to_lock_depths_, + reg_to_lock_depths_)) { verifier->Fail(VERIFY_ERROR_LOCKING); if (kDumpLockFailures) { LOG(WARNING) << "mismatched stack depths for register v" << idx @@ -429,20 +477,51 @@ bool RegisterLine::MergeRegisters(MethodVerifier* verifier, const RegisterLine* } break; } + // We found aliases, set this to zero. + reg_to_lock_depths_.erase(idx); } else if (depths > 0) { // Check whether they're actually the same levels. uint32_t locked_levels = reg_to_lock_depths_.find(idx)->second; uint32_t incoming_locked_levels = incoming_line->reg_to_lock_depths_.find(idx)->second; if (locked_levels != incoming_locked_levels) { - verifier->Fail(VERIFY_ERROR_LOCKING); - if (kDumpLockFailures) { - LOG(WARNING) << "mismatched lock levels for register v" << idx << ": " - << std::hex << locked_levels << std::dec << " != " - << std::hex << incoming_locked_levels << std::dec << " in " - << PrettyMethod(verifier->GetMethodReference().dex_method_index, - *verifier->GetMethodReference().dex_file); + // Lock levels aren't matching. This is potentially bad, as we don't do a + // flow-sensitive analysis. + // However, this could be an alias of something locked in one path, and the alias was + // destroyed in another path. It is fine to drop this as long as there's another alias + // for the lock around. The last vanishing alias will then report that things would be + // left unlocked. We need to check for aliases for both lock levels. + // + // Example (lock status in curly braces as pair of register and lock leels): + // + // lock v1 {v1=1} + // lock v2 {v1=1, v2=2} + // | | + // v0 = v1 {v0=1, v1=1, v2=2} v0 = v2 {v0=2, v1=1, v2=2} + // | | + // {v1=1, v2=2} + // // Dropping v0, as the status can't be + // // merged but the lock info ("locked at + // // depth 1" and "locked at depth 2") is + // // available. + if (!FindLockAliasedRegister(idx, + reg_to_lock_depths_, + reg_to_lock_depths_) || + !FindLockAliasedRegister(idx, + incoming_line->reg_to_lock_depths_, + reg_to_lock_depths_)) { + // No aliases for both current and incoming, we'll lose information. + verifier->Fail(VERIFY_ERROR_LOCKING); + if (kDumpLockFailures) { + LOG(WARNING) << "mismatched lock levels for register v" << idx << ": " + << std::hex << locked_levels << std::dec << " != " + << std::hex << incoming_locked_levels << std::dec << " in " + << PrettyMethod(verifier->GetMethodReference().dex_method_index, + *verifier->GetMethodReference().dex_file); + } + break; } - break; + // We found aliases, set this to zero. + reg_to_lock_depths_.erase(idx); } } } diff --git a/test/088-monitor-verification/src/TwoPath.java b/test/088-monitor-verification/src/TwoPath.java index 2542de7f27..bdc15ad82e 100644 --- a/test/088-monitor-verification/src/TwoPath.java +++ b/test/088-monitor-verification/src/TwoPath.java @@ -31,6 +31,8 @@ public class TwoPath { * Conditionally uses one of the synchronized objects. */ public static void twoPath(Object obj1, Object obj2, int x) { + Main.assertIsManaged(); + Object localObj; synchronized (obj1) { diff --git a/test/131-structural-change/expected.txt b/test/131-structural-change/expected.txt index cc7713d252..1d19278f1e 100644 --- a/test/131-structural-change/expected.txt +++ b/test/131-structural-change/expected.txt @@ -1,2 +1,3 @@ +JNI_OnLoad called Should really reach here. Done. diff --git a/test/131-structural-change/src/Main.java b/test/131-structural-change/src/Main.java index 6cbbd12387..c7488992df 100644 --- a/test/131-structural-change/src/Main.java +++ b/test/131-structural-change/src/Main.java @@ -35,7 +35,7 @@ public class Main { e.printStackTrace(System.out); } - boolean haveOatFile = hasOat(); + boolean haveOatFile = hasOatFile(); boolean gotError = false; try { Class<?> bClass = getClass().getClassLoader().loadClass("B"); @@ -45,10 +45,10 @@ public class Main { e.printStackTrace(System.out); } if (haveOatFile ^ gotError) { - System.out.println("Did not get expected error."); + System.out.println("Did not get expected error. " + haveOatFile + " " + gotError); } System.out.println("Done."); } - private native static boolean hasOat(); + private native static boolean hasOatFile(); } diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index ce8a0cd17a..e114a2e9f3 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -329,13 +329,15 @@ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUIL $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES)) # 131 is an old test. The functionality has been implemented at an earlier stage and is checked -# in tests 138. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ +# in tests 138. Blacklisted for debug builds since these builds have duplicate classes checks which +# punt to interpreter. +ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),debug,$(PREBUILD_TYPES), \ $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES)) -# 138-duplicate-classes-check. Turned off temporarily, b/21333911. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ +# 138-duplicate-classes-check. Turned on for debug builds since debug builds have duplicate classes +# checks enabled, b/2133391. +ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \ $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES)) |