diff options
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 97 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 92 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 51 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 8 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.h | 2 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 12 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 8 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 2 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 12 | ||||
| -rw-r--r-- | dexdump/dexdump.cc | 51 | ||||
| -rw-r--r-- | dexdump/dexdump.h | 1 | ||||
| -rw-r--r-- | dexdump/dexdump_main.cc | 6 | ||||
| -rw-r--r-- | disassembler/disassembler_x86.cc | 3 | ||||
| -rw-r--r-- | runtime/art_method-inl.h | 9 | ||||
| -rw-r--r-- | runtime/art_method.h | 3 | ||||
| -rw-r--r-- | runtime/gc/collector/semi_space.cc | 6 | ||||
| -rw-r--r-- | runtime/thread.cc | 20 | ||||
| -rw-r--r-- | runtime/utils.cc | 369 | ||||
| -rw-r--r-- | runtime/utils.h | 3 | ||||
| -rw-r--r-- | test/526-long-regalloc/expected.txt | 0 | ||||
| -rw-r--r-- | test/526-long-regalloc/info.txt | 2 | ||||
| -rw-r--r-- | test/526-long-regalloc/src/Main.java | 72 |
22 files changed, 801 insertions, 28 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 56313730dc..a5332ea794 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1055,6 +1055,102 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = WRegisterFrom(locations->InAt(0)); + Register arg = WRegisterFrom(locations->InAt(1)); + Register out = XRegisterFrom(locations->Out()); + + UseScratchRegisterScope scratch_scope(masm); + Register temp = scratch_scope.AcquireW(); + Register temp1 = WRegisterFrom(locations->GetTemp(0)); + Register temp2 = WRegisterFrom(locations->GetTemp(1)); + + vixl::Label loop; + vixl::Label end; + vixl::Label return_true; + vixl::Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check if input is null, return false if it is. + __ Cbz(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Cmp(str, arg); + __ B(&return_true, eq); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Ldr(temp, MemOperand(str.X(), class_offset)); + __ Ldr(temp1, MemOperand(arg.X(), class_offset)); + __ Cmp(temp, temp1); + __ B(&return_false, ne); + + // Load lengths of this and argument strings. + __ Ldr(temp, MemOperand(str.X(), count_offset)); + __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + // Check if lengths are equal, return false if they're not. + __ Cmp(temp, temp1); + __ B(&return_false, ne); + // Store offset of string value in preparation for comparison loop + __ Mov(temp1, value_offset); + // Return true if both strings are empty. + __ Cbz(temp, &return_true); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + temp1 = temp1.X(); + temp2 = temp2.X(); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ldr(out, MemOperand(str.X(), temp1)); + __ Ldr(temp2, MemOperand(arg.X(), temp1)); + __ Add(temp1, temp1, Operand(sizeof(uint64_t))); + __ Cmp(out, temp2); + __ B(&return_false, ne); + __ Sub(temp, temp, Operand(4), SetFlags); + __ B(&loop, gt); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ Mov(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ Mov(out, 0); + __ Bind(&end); +} + static void GenerateVisitStringIndexOf(HInvoke* invoke, vixl::MacroAssembler* masm, CodeGeneratorARM64* codegen, @@ -1229,7 +1325,6 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) -UNIMPLEMENTED_INTRINSIC(StringEquals) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 3c8be27fb9..4471d713e2 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -945,6 +945,97 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + + // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction. + locations->AddTemp(Location::RegisterLocation(ECX)); + locations->AddTemp(Location::RegisterLocation(EDI)); + + // Set output, ESI needed for repe_cmpsl instruction anyways. + locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = locations->InAt(0).AsRegister<Register>(); + Register arg = locations->InAt(1).AsRegister<Register>(); + Register ecx = locations->GetTemp(0).AsRegister<Register>(); + Register edi = locations->GetTemp(1).AsRegister<Register>(); + Register esi = locations->Out().AsRegister<Register>(); + + Label end; + Label return_true; + Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check if input is null, return false if it is. + __ testl(arg, arg); + __ j(kEqual, &return_false); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ movl(ecx, Address(str, class_offset)); + __ cmpl(ecx, Address(arg, class_offset)); + __ j(kNotEqual, &return_false); + + // Reference equality check, return true if same reference. + __ cmpl(str, arg); + __ j(kEqual, &return_true); + + // Load length of receiver string. + __ movl(ecx, Address(str, count_offset)); + // Check if lengths are equal, return false if they're not. + __ cmpl(ecx, Address(arg, count_offset)); + __ j(kNotEqual, &return_false); + // Return true if both strings are empty. + __ testl(ecx, ecx); + __ j(kEqual, &return_true); + + // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. + __ leal(esi, Address(str, value_offset)); + __ leal(edi, Address(arg, value_offset)); + + // Divide string length by 2 to compare characters 2 at a time and adjust for odd lengths. + __ addl(ecx, Immediate(1)); + __ shrl(ecx, Immediate(1)); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings two characters at a time starting at the beginning of the string. + __ repe_cmpsl(); + // If strings are not equal, zero flag will be cleared. + __ j(kNotEqual, &return_false); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ movl(esi, Immediate(1)); + __ jmp(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ xorl(esi, esi); + __ Bind(&end); +} + static void CreateStringIndexOfLocations(HInvoke* invoke, ArenaAllocator* allocator, bool start_at_zero) { @@ -1758,7 +1849,6 @@ UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(StringEquals) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 60f5ab24da..9f32a9eaf8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -952,7 +952,16 @@ bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, // we spill `current` instead. bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { size_t first_register_use = current->FirstRegisterUse(); - if (first_register_use == kNoLifetime) { + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + // The low interval has allocated the register for the high interval. In + // case the low interval had to split both intervals, we may end up in a + // situation where the high interval does not have a register use anymore. + // We must still proceed in order to split currently active and inactive + // uses of the high interval's register, and put the high interval in the + // active set. + DCHECK(first_register_use != kNoLifetime || (current->GetNextSibling() != nullptr)); + } else if (first_register_use == kNoLifetime) { AllocateSpillSlotFor(current); return false; } @@ -1019,7 +1028,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // When allocating the low part, we made sure the high register was available. DCHECK_LT(first_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use, first_register_use); + reg = FindAvailableRegisterPair(next_use, first_use); // We should spill if both registers are not available. should_spill = (first_use >= next_use[reg]) || (first_use >= next_use[GetHighForLowRegister(reg)]); @@ -1033,16 +1042,28 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (should_spill) { DCHECK(!current->IsHighInterval()); bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); - if (current->IsLowInterval() - && is_allocation_at_use_site - && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), - first_register_use, - next_use)) { + if (is_allocation_at_use_site) { + if (!current->IsLowInterval()) { + DumpInterval(std::cerr, current); + DumpAllIntervals(std::cerr); + // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); + CHECK(false) << "There is not enough registers available for " + << current->GetParent()->GetDefinedBy()->DebugName() << " " + << current->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1 << " " + << (at == nullptr ? "" : at->DebugName()); + } + // If we're allocating a register for `current` because the instruction at // that position requires it, but we think we should spill, then there are // non-pair intervals or unaligned pair intervals blocking the allocation. // We split the first interval found, and put ourselves first in the // `unhandled_` list. + bool success = TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use); + DCHECK(success); LiveInterval* existing = unhandled_->Peek(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); @@ -1052,17 +1073,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); LiveInterval* split = SplitBetween(current, current->GetStart(), first_register_use - 1); - if (current == split) { - DumpInterval(std::cerr, current); - DumpAllIntervals(std::cerr); - // This situation has the potential to infinite loop, so we make it a non-debug CHECK. - HInstruction* at = liveness_.GetInstructionFromPosition(first_register_use / 2); - CHECK(false) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName() << " " - << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1 << " " - << (at == nullptr ? "" : at->DebugName()); - } + DCHECK(current != split); AddSorted(unhandled_, split); } return false; @@ -1243,7 +1254,9 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { if (interval->IsHighInterval()) { - // The low interval will contain the spill slot. + // The low interval already took care of allocating the spill slot. + DCHECK(!interval->GetLowInterval()->HasRegister()); + DCHECK(interval->GetLowInterval()->GetParent()->HasSpillSlot()); return; } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 8c2a3ed637..914bd56683 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1552,6 +1552,14 @@ void X86Assembler::repe_cmpsl() { } +void X86Assembler::rep_movsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA5); +} + + X86Assembler* X86Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 37c69fee9f..850e1dab32 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -470,6 +470,7 @@ class X86Assembler FINAL : public Assembler { void repne_scasw(); void repe_cmpsw(); void repe_cmpsl(); + void rep_movsw(); X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); @@ -649,7 +650,6 @@ class X86Assembler FINAL : public Assembler { void EmitComplex(int rm, const Operand& operand, const Immediate& immediate); void EmitLabel(Label* label, int instruction_size); void EmitLabelLink(Label* label); - void EmitNearLabelLink(Label* label); void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index b664d2342f..c257b789be 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -218,4 +218,16 @@ TEST_F(AssemblerX86Test, Repecmpsl) { DriverStr(expected, "Repecmpsl"); } +TEST_F(AssemblerX86Test, RepneScasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "repne_scasw"); +} + +TEST_F(AssemblerX86Test, RepMovsw) { + GetAssembler()->rep_movsw(); + const char* expected = "rep movsw\n"; + DriverStr(expected, "rep_movsw"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 22e7b9b120..38a9e72dae 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -2006,6 +2006,14 @@ void X86_64Assembler::jmp(Label* label) { } +void X86_64Assembler::rep_movsw() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0xF3); + EmitUint8(0xA5); +} + + X86_64Assembler* X86_64Assembler::lock() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF0); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index beca0372ae..67807850c6 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -610,6 +610,7 @@ class X86_64Assembler FINAL : public Assembler { void repe_cmpsw(); void repe_cmpsl(); void repe_cmpsq(); + void rep_movsw(); // // Macros for High-level operations. @@ -803,7 +804,6 @@ class X86_64Assembler FINAL : public Assembler { void EmitComplex(uint8_t rm, const Operand& operand, const Immediate& immediate); void EmitLabel(Label* label, int instruction_size); void EmitLabelLink(Label* label); - void EmitNearLabelLink(Label* label); void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm); void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 296487e798..7e879e9ce1 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -836,6 +836,18 @@ TEST_F(AssemblerX86_64Test, Xorq) { DriverStr(expected, "xorq"); } +TEST_F(AssemblerX86_64Test, RepneScasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "repne_scasw"); +} + +TEST_F(AssemblerX86_64Test, RepMovsw) { + GetAssembler()->rep_movsw(); + const char* expected = "rep movsw\n"; + DriverStr(expected, "rep_movsw"); +} + TEST_F(AssemblerX86_64Test, Movsxd) { DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd"); } diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc index 84c465fb8a..282db5de83 100644 --- a/dexdump/dexdump.cc +++ b/dexdump/dexdump.cc @@ -38,11 +38,14 @@ #include <inttypes.h> #include <stdio.h> +#include <iostream> #include <memory> +#include <sstream> #include <vector> #include "dex_file-inl.h" #include "dex_instruction-inl.h" +#include "utils.h" namespace art { @@ -1046,6 +1049,49 @@ static void dumpIField(const DexFile* pDexFile, u4 idx, u4 flags, int i) { } /* + * Dumping a CFG. Note that this will do duplicate work. utils.h doesn't expose the code-item + * version, so the DumpMethodCFG code will have to iterate again to find it. But dexdump is a + * tool, so this is not performance-critical. + */ + +static void dumpCfg(const DexFile* dex_file, + uint32_t dex_method_idx, + const DexFile::CodeItem* code_item) { + if (code_item != nullptr) { + std::ostringstream oss; + DumpMethodCFG(dex_file, dex_method_idx, oss); + fprintf(gOutFile, "%s", oss.str().c_str()); + } +} + +static void dumpCfg(const DexFile* dex_file, int idx) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(idx); + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { // empty class such as a marker interface? + return; + } + ClassDataItemIterator it(*dex_file, class_data); + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } + while (it.HasNextDirectMethod()) { + dumpCfg(dex_file, + it.GetMemberIndex(), + it.GetMethodCodeItem()); + it.Next(); + } + while (it.HasNextVirtualMethod()) { + dumpCfg(dex_file, + it.GetMemberIndex(), + it.GetMethodCodeItem()); + it.Next(); + } +} + +/* * Dumps the class. * * Note "idx" is a DexClassDef index, not a DexTypeId index. @@ -1061,6 +1107,11 @@ static void dumpClass(const DexFile* pDexFile, int idx, char** pLastPackage) { return; } + if (gOptions.cfg) { + dumpCfg(pDexFile, idx); + return; + } + // For the XML output, show the package name. Ideally we'd gather // up the classes, sort them, and dump them alphabetically so the // package name wouldn't jump around, but that's not a great plan diff --git a/dexdump/dexdump.h b/dexdump/dexdump.h index f2cd16a6d7..50280a9f28 100644 --- a/dexdump/dexdump.h +++ b/dexdump/dexdump.h @@ -45,6 +45,7 @@ struct Options { bool showFileHeaders; bool showSectionHeaders; bool verbose; + bool cfg; OutputFormat outputFormat; const char* outputFileName; const char* tempFileName; diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc index 9be0922877..2466f33d1e 100644 --- a/dexdump/dexdump_main.cc +++ b/dexdump/dexdump_main.cc @@ -46,6 +46,7 @@ static void usage(void) { fprintf(stderr, " -c : verify checksum and exit\n"); fprintf(stderr, " -d : disassemble code sections\n"); fprintf(stderr, " -f : display summary information from file header\n"); + fprintf(stderr, " -g : dump CFG for dex\n"); fprintf(stderr, " -h : display file header details\n"); fprintf(stderr, " -i : ignore checksum failures\n"); fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n"); @@ -68,7 +69,7 @@ int dexdumpDriver(int argc, char** argv) { // Parse all arguments. while (1) { - const int ic = getopt(argc, argv, "cdfhil:t:o:"); + const int ic = getopt(argc, argv, "cdfghil:t:o:"); if (ic < 0) { break; // done } @@ -82,6 +83,9 @@ int dexdumpDriver(int argc, char** argv) { case 'f': // dump outer file header gOptions.showFileHeaders = true; break; + case 'g': // dump cfg + gOptions.cfg = true; + break; case 'h': // dump section headers, i.e. all meta-data gOptions.showSectionHeaders = true; break; diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 44787a7ac8..9bee1040ba 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -1117,6 +1117,9 @@ DISASSEMBLER_ENTRY(cmp, opcode1 = opcode_tmp.c_str(); } break; + case 0xA5: + opcode1 = (prefix[2] == 0x66 ? "movsw" : "movsl"); + break; case 0xA7: opcode1 = (prefix[2] == 0x66 ? "cmpsw" : "cmpsl"); break; diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h index bb3c72c433..51398cad35 100644 --- a/runtime/art_method-inl.h +++ b/runtime/art_method-inl.h @@ -63,6 +63,15 @@ inline void ArtMethod::SetDeclaringClass(mirror::Class* new_declaring_class) { declaring_class_ = GcRoot<mirror::Class>(new_declaring_class); } +inline bool ArtMethod::CASDeclaringClass(mirror::Class* expected_class, + mirror::Class* desired_class) { + GcRoot<mirror::Class> expected_root(expected_class); + GcRoot<mirror::Class> desired_root(desired_class); + return reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&declaring_class_)-> + CompareExchangeStrongSequentiallyConsistent( + expected_root, desired_root); +} + inline uint32_t ArtMethod::GetAccessFlags() { DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() || GetDeclaringClass()->IsErroneous()); diff --git a/runtime/art_method.h b/runtime/art_method.h index 90352b7c08..85c03ed5e7 100644 --- a/runtime/art_method.h +++ b/runtime/art_method.h @@ -67,6 +67,9 @@ class ArtMethod FINAL { void SetDeclaringClass(mirror::Class *new_declaring_class) SHARED_REQUIRES(Locks::mutator_lock_); + bool CASDeclaringClass(mirror::Class* expected_class, mirror::Class* desired_class) + SHARED_REQUIRES(Locks::mutator_lock_); + static MemberOffset DeclaringClassOffset() { return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_)); } diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index c11c134326..fc2a801b7f 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -614,7 +614,9 @@ void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count, for (size_t i = 0; i < count; ++i) { auto* root = roots[i]; auto ref = StackReference<mirror::Object>::FromMirrorPtr(*root); - MarkObject(&ref); + // The root can be in the to-space since we may visit the declaring class of an ArtMethod + // multiple times if it is on the call stack. + MarkObjectIfNotInToSpace(&ref); if (*root != ref.AsMirrorPtr()) { *root = ref.AsMirrorPtr(); } @@ -624,7 +626,7 @@ void SemiSpace::VisitRoots(mirror::Object*** roots, size_t count, void SemiSpace::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) { for (size_t i = 0; i < count; ++i) { - MarkObject(roots[i]); + MarkObjectIfNotInToSpace(roots[i]); } } diff --git a/runtime/thread.cc b/runtime/thread.cc index d54a7a6aa8..a33e150b93 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2419,6 +2419,7 @@ class ReferenceMapVisitor : public StackVisitor { void VisitShadowFrame(ShadowFrame* shadow_frame) SHARED_REQUIRES(Locks::mutator_lock_) { ArtMethod* m = shadow_frame->GetMethod(); + VisitDeclaringClass(m); DCHECK(m != nullptr); size_t num_regs = shadow_frame->NumberOfVRegs(); if (m->IsNative() || shadow_frame->HasReferenceArray()) { @@ -2459,10 +2460,25 @@ class ReferenceMapVisitor : public StackVisitor { } private: + // Visiting the declaring class is necessary so that we don't unload the class of a method that + // is executing. We need to ensure that the code stays mapped. + void VisitDeclaringClass(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) { + mirror::Class* klass = method->GetDeclaringClassNoBarrier(); + // klass can be null for runtime methods. + if (klass != nullptr) { + mirror::Object* new_ref = klass; + visitor_(&new_ref, -1, this); + if (new_ref != klass) { + method->CASDeclaringClass(klass, new_ref->AsClass()); + } + } + } + void VisitQuickFrame() SHARED_REQUIRES(Locks::mutator_lock_) { - auto* cur_quick_frame = GetCurrentQuickFrame(); + ArtMethod** cur_quick_frame = GetCurrentQuickFrame(); DCHECK(cur_quick_frame != nullptr); - auto* m = *cur_quick_frame; + ArtMethod* m = *cur_quick_frame; + VisitDeclaringClass(m); // Process register map (which native and runtime methods don't have) if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) { diff --git a/runtime/utils.cc b/runtime/utils.cc index 20512f9765..db3f2fecb8 100644 --- a/runtime/utils.cc +++ b/runtime/utils.cc @@ -30,6 +30,7 @@ #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "dex_file-inl.h" +#include "dex_instruction.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" #include "mirror/object-inl.h" @@ -1452,4 +1453,372 @@ std::string PrettyDescriptor(Primitive::Type type) { return PrettyDescriptor(Primitive::Descriptor(type)); } +static void DumpMethodCFGImpl(const DexFile* dex_file, + uint32_t dex_method_idx, + const DexFile::CodeItem* code_item, + std::ostream& os) { + os << "digraph {\n"; + os << " # /* " << PrettyMethod(dex_method_idx, *dex_file, true) << " */\n"; + + std::set<uint32_t> dex_pc_is_branch_target; + { + // Go and populate. + const Instruction* inst = Instruction::At(code_item->insns_); + for (uint32_t dex_pc = 0; + dex_pc < code_item->insns_size_in_code_units_; + dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { + if (inst->IsBranch()) { + dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset()); + } else if (inst->IsSwitch()) { + const uint16_t* insns = code_item->insns_ + dex_pc; + int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16; + const uint16_t* switch_insns = insns + switch_offset; + uint32_t switch_count = switch_insns[1]; + int32_t targets_offset; + if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { + /* 0=sig, 1=count, 2/3=firstKey */ + targets_offset = 4; + } else { + /* 0=sig, 1=count, 2..count*2 = keys */ + targets_offset = 2 + 2 * switch_count; + } + for (uint32_t targ = 0; targ < switch_count; targ++) { + int32_t offset = (int32_t) switch_insns[targets_offset + targ * 2] | + (int32_t) (switch_insns[targets_offset + targ * 2 + 1] << 16); + dex_pc_is_branch_target.insert(dex_pc + offset); + } + } + } + } + + // Create nodes for "basic blocks." + std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts. + std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs. + + { + const Instruction* inst = Instruction::At(code_item->insns_); + bool first_in_block = true; + bool force_new_block = false; + for (uint32_t dex_pc = 0; dex_pc < code_item->insns_size_in_code_units_; + dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { + if (dex_pc == 0 || + (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) || + force_new_block) { + uint32_t id = dex_pc_to_node_id.size(); + if (id > 0) { + // End last node. + os << "}\"];\n"; + } + // Start next node. + os << " node" << id << " [shape=record,label=\"{"; + dex_pc_to_node_id.insert(std::make_pair(dex_pc, id)); + first_in_block = true; + force_new_block = false; + } + + // Register instruction. + dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1)); + + // Print instruction. + if (!first_in_block) { + os << " | "; + } else { + first_in_block = false; + } + + // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'. + os << "<" << "p" << dex_pc << ">"; + os << " 0x" << std::hex << dex_pc << std::dec << ": "; + std::string inst_str = inst->DumpString(dex_file); + size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars + // we need to escape. + while (cur_start != std::string::npos) { + size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1); + if (next_escape == std::string::npos) { + os << inst_str.substr(cur_start, inst_str.size() - cur_start); + break; + } else { + os << inst_str.substr(cur_start, next_escape - cur_start); + // Escape all necessary characters. + while (next_escape < inst_str.size()) { + char c = inst_str.at(next_escape); + if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') { + os << '\\' << c; + } else { + break; + } + next_escape++; + } + if (next_escape >= inst_str.size()) { + next_escape = std::string::npos; + } + cur_start = next_escape; + } + } + + // Force a new block for some fall-throughs and some instructions that terminate the "local" + // control flow. + force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd(); + } + // Close last node. + if (dex_pc_to_node_id.size() > 0) { + os << "}\"];\n"; + } + } + + // Create edges between them. + { + std::ostringstream regular_edges; + std::ostringstream taken_edges; + std::ostringstream exception_edges; + + // Common set of exception edges. + std::set<uint32_t> exception_targets; + + // These blocks (given by the first dex pc) need exception per dex-pc handling in a second + // pass. In the first pass we try and see whether we can use a common set of edges. + std::set<uint32_t> blocks_with_detailed_exceptions; + + { + uint32_t last_node_id = std::numeric_limits<uint32_t>::max(); + uint32_t old_dex_pc = 0; + uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max(); + const Instruction* inst = Instruction::At(code_item->insns_); + for (uint32_t dex_pc = 0; + dex_pc < code_item->insns_size_in_code_units_; + old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { + { + auto it = dex_pc_to_node_id.find(dex_pc); + if (it != dex_pc_to_node_id.end()) { + if (!exception_targets.empty()) { + // It seems the last block had common exception handlers. Add the exception edges now. + uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; + for (uint32_t handler_pc : exception_targets) { + auto node_id_it = dex_pc_to_incl_id.find(handler_pc); + if (node_id_it != dex_pc_to_incl_id.end()) { + exception_edges << " node" << node_id + << " -> node" << node_id_it->second << ":p" << handler_pc + << ";\n"; + } + } + exception_targets.clear(); + } + + block_start_dex_pc = dex_pc; + + // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things + // like switch data. + uint32_t old_last = last_node_id; + last_node_id = it->second; + if (old_last != std::numeric_limits<uint32_t>::max()) { + regular_edges << " node" << old_last << ":p" << old_dex_pc + << " -> node" << last_node_id << ":p" << dex_pc + << ";\n"; + } + } + + // Look at the exceptions of the first entry. + CatchHandlerIterator catch_it(*code_item, dex_pc); + for (; catch_it.HasNext(); catch_it.Next()) { + exception_targets.insert(catch_it.GetHandlerAddress()); + } + } + + // Handle instruction. + + // Branch: something with at most two targets. + if (inst->IsBranch()) { + const int32_t offset = inst->GetTargetOffset(); + const bool conditional = !inst->IsUnconditional(); + + auto target_it = dex_pc_to_node_id.find(dex_pc + offset); + if (target_it != dex_pc_to_node_id.end()) { + taken_edges << " node" << last_node_id << ":p" << dex_pc + << " -> node" << target_it->second << ":p" << (dex_pc + offset) + << ";\n"; + } + if (!conditional) { + // No fall-through. + last_node_id = std::numeric_limits<uint32_t>::max(); + } + } else if (inst->IsSwitch()) { + // TODO: Iterate through all switch targets. + const uint16_t* insns = code_item->insns_ + dex_pc; + /* make sure the start of the switch is in range */ + int32_t switch_offset = insns[1] | ((int32_t) insns[2]) << 16; + /* offset to switch table is a relative branch-style offset */ + const uint16_t* switch_insns = insns + switch_offset; + uint32_t switch_count = switch_insns[1]; + int32_t targets_offset; + if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { + /* 0=sig, 1=count, 2/3=firstKey */ + targets_offset = 4; + } else { + /* 0=sig, 1=count, 2..count*2 = keys */ + targets_offset = 2 + 2 * switch_count; + } + /* make sure the end of the switch is in range */ + /* verify each switch target */ + for (uint32_t targ = 0; targ < switch_count; targ++) { + int32_t offset = (int32_t) switch_insns[targets_offset + targ * 2] | + (int32_t) (switch_insns[targets_offset + targ * 2 + 1] << 16); + int32_t abs_offset = dex_pc + offset; + auto target_it = dex_pc_to_node_id.find(abs_offset); + if (target_it != dex_pc_to_node_id.end()) { + // TODO: value label. + taken_edges << " node" << last_node_id << ":p" << dex_pc + << " -> node" << target_it->second << ":p" << (abs_offset) + << ";\n"; + } + } + } + + // Exception edges. If this is not the first instruction in the block + if (block_start_dex_pc != dex_pc) { + std::set<uint32_t> current_handler_pcs; + CatchHandlerIterator catch_it(*code_item, dex_pc); + for (; catch_it.HasNext(); catch_it.Next()) { + current_handler_pcs.insert(catch_it.GetHandlerAddress()); + } + if (current_handler_pcs != exception_targets) { + exception_targets.clear(); // Clear so we don't do something at the end. + blocks_with_detailed_exceptions.insert(block_start_dex_pc); + } + } + + if (inst->IsReturn() || + (inst->Opcode() == Instruction::THROW) || + (inst->IsBranch() && inst->IsUnconditional())) { + // No fall-through. + last_node_id = std::numeric_limits<uint32_t>::max(); + } + } + // Finish up the last block, if it had common exceptions. + if (!exception_targets.empty()) { + // It seems the last block had common exception handlers. Add the exception edges now. + uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; + for (uint32_t handler_pc : exception_targets) { + auto node_id_it = dex_pc_to_incl_id.find(handler_pc); + if (node_id_it != dex_pc_to_incl_id.end()) { + exception_edges << " node" << node_id + << " -> node" << node_id_it->second << ":p" << handler_pc + << ";\n"; + } + } + exception_targets.clear(); + } + } + + // Second pass for detailed exception blocks. + // TODO + // Exception edges. If this is not the first instruction in the block + for (uint32_t dex_pc : blocks_with_detailed_exceptions) { + const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]); + uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second; + for (;;) { + CatchHandlerIterator catch_it(*code_item, dex_pc); + if (catch_it.HasNext()) { + std::set<uint32_t> handled_targets; + for (; catch_it.HasNext(); catch_it.Next()) { + uint32_t handler_pc = catch_it.GetHandlerAddress(); + auto it = handled_targets.find(handler_pc); + if (it == handled_targets.end()) { + auto node_id_it = dex_pc_to_incl_id.find(handler_pc); + if (node_id_it != dex_pc_to_incl_id.end()) { + exception_edges << " node" << this_node_id << ":p" << dex_pc + << " -> node" << node_id_it->second << ":p" << handler_pc + << ";\n"; + } + + // Mark as done. + handled_targets.insert(handler_pc); + } + } + } + if (inst->IsBasicBlockEnd()) { + break; + } + + // Loop update. In the body to have a late break-out if the next instruction is a branch + // target and thus in another block. + dex_pc += inst->SizeInCodeUnits(); + if (dex_pc >= code_item->insns_size_in_code_units_) { + break; + } + if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) { + break; + } + inst = inst->Next(); + } + } + + // Write out the sub-graphs to make edges styled. + os << "\n"; + os << " subgraph regular_edges {\n"; + os << " edge [color=\"#000000\",weight=.3,len=3];\n\n"; + os << " " << regular_edges.str() << "\n"; + os << " }\n\n"; + + os << " subgraph taken_edges {\n"; + os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n"; + os << " " << taken_edges.str() << "\n"; + os << " }\n\n"; + + os << " subgraph exception_edges {\n"; + os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n"; + os << " " << exception_edges.str() << "\n"; + os << " }\n\n"; + } + + os << "}\n"; +} + +void DumpMethodCFG(ArtMethod* method, std::ostream& os) { + const DexFile* dex_file = method->GetDexFile(); + const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + + DumpMethodCFGImpl(dex_file, method->GetDexMethodIndex(), code_item, os); +} + +void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) { + // This is painful, we need to find the code item. That means finding the class, and then + // iterating the table. + if (dex_method_idx >= dex_file->NumMethodIds()) { + os << "Could not find method-idx."; + return; + } + const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx); + + const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_); + if (class_def == nullptr) { + os << "Could not find class-def."; + return; + } + + const uint8_t* class_data = dex_file->GetClassData(*class_def); + if (class_data == nullptr) { + os << "No class data."; + return; + } + + ClassDataItemIterator it(*dex_file, class_data); + // Skip fields + while (it.HasNextStaticField() || it.HasNextInstanceField()) { + it.Next(); + } + + // Find method, and dump it. + while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + if (method_idx == dex_method_idx) { + DumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os); + return; + } + it.Next(); + } + + // Otherwise complain. + os << "Something went wrong, didn't find the method in the class data."; +} + } // namespace art diff --git a/runtime/utils.h b/runtime/utils.h index 4fa5f5a539..d1be51aff7 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -324,6 +324,9 @@ static inline constexpr bool ValidPointerSize(size_t pointer_size) { return pointer_size == 4 || pointer_size == 8; } +void DumpMethodCFG(ArtMethod* method, std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_); +void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os); + } // namespace art #endif // ART_RUNTIME_UTILS_H_ diff --git a/test/526-long-regalloc/expected.txt b/test/526-long-regalloc/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/526-long-regalloc/expected.txt diff --git a/test/526-long-regalloc/info.txt b/test/526-long-regalloc/info.txt new file mode 100644 index 0000000000..a5ce1bc011 --- /dev/null +++ b/test/526-long-regalloc/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing that used to trip when allocating a register +pair under certain circumstances. diff --git a/test/526-long-regalloc/src/Main.java b/test/526-long-regalloc/src/Main.java new file mode 100644 index 0000000000..e8b3096d06 --- /dev/null +++ b/test/526-long-regalloc/src/Main.java @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class Main { + public static void main(String[] args) { + foo(); + } + + public static void foo() { + int a = myField1; // esi + int b = myField2; // edi + $noinline$bar(); // makes allocation of a and b to be callee-save registers + int c = myField3; // ecx + int e = myField4; // ebx + int f = myField5; // edx + long d = a == 42 ? myLongField1 : 42L; // Will call AllocateBlockedReg -> edx/ebx + + // At this point, the register allocator used to be in a bogus state, where the low + // part of the interval was in the active set, but not the high part. + + long i = myLongField1; // Will call TrySplitNonPairOrUnalignedPairIntervalAt -> Failing DCHECK + + // Use esi and edi first to not have d allocated to them. + myField2 = a; + myField3 = b; + + // The following sequence of instructions are making the AllocateBlockedReg call + // for allocating the d variable misbehave: allocation of the low interval would split + // both low and high interval at the fixed use; therefore the allocation of the high interval + // would not see the register use, and think the interval can just be spilled and not be + // put in the active set, even though it is holding a register. + myField1 = (int)d; // stack use + myLongField3 = (long) myField2; // edx fixed use + myLongField2 = d; // register use + + // Ensure the HInstruction mapping to i, c, e, and f have a live range. + myLongField1 = i; + myField4 = c; + myField5 = e; + myField6 = f; + } + + public static long $noinline$bar() { + if (doThrow) throw new Error(); + return 42; + } + + public static boolean doThrow = false; + + public static int myField1 = 0; + public static int myField2 = 0; + public static int myField3 = 0; + public static int myField4 = 0; + public static int myField5 = 0; + public static int myField6 = 0; + public static long myLongField1 = 0L; + public static long myLongField2 = 0L; + public static long myLongField3 = 0L; +} |