diff options
Diffstat (limited to 'compiler')
48 files changed, 3652 insertions, 1010 deletions
diff --git a/compiler/compiler.h b/compiler/compiler.h index 9a69456b5a..2ca0b77a73 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -18,6 +18,7 @@ #define ART_COMPILER_COMPILER_H_ #include "dex_file.h" +#include "base/mutex.h" #include "os.h" namespace art { @@ -34,6 +35,7 @@ class CompilerDriver; class CompiledMethod; template<class T> class Handle; class OatWriter; +class Thread; class Compiler { public: diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 56b4ebd608..1b87725230 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -24,6 +24,8 @@ #include <malloc.h> // For mallinfo #endif +#include "android-base/strings.h" + #include "art_field-inl.h" #include "art_method-inl.h" #include "base/array_ref.h" @@ -2011,12 +2013,18 @@ class VerifyClassVisitor : public CompilationVisitor { CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous()) << klass->PrettyDescriptor() << ": state=" << klass->GetStatus(); - // It is *very* problematic if there are verification errors in the boot classpath. - // For example, we rely on things working OK without verification when the - // decryption dialog is brought up. So abort in a debug build if we find this violated. - DCHECK(!manager_->GetCompiler()->GetCompilerOptions().IsBootImage() || klass->IsVerified()) - << "Boot classpath class " << klass->PrettyClass() - << " failed to fully verify."; + // It is *very* problematic if there are verification errors in the boot classpath. For example, + // we rely on things working OK without verification when the decryption dialog is brought up. + // So abort in a debug build if we find this violated. + if (kIsDebugBuild) { + // TODO(narayan): Remove this special case for signature polymorphic + // invokes once verifier support is fully implemented. + if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage() && + !android::base::StartsWith(descriptor, "Ljava/lang/invoke/")) { + DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass() + << " failed to fully verify: state= " << klass->GetStatus(); + } + } } else { // Make the skip a soft failure, essentially being considered as verify at runtime. failure_kind = verifier::MethodVerifier::kSoftFailure; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index d1ac139912..59f339a9a2 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -963,21 +963,21 @@ void ImageWriter::DumpImageClasses() { mirror::String* ImageWriter::FindInternedString(mirror::String* string) { Thread* const self = Thread::Current(); for (const ImageInfo& image_info : image_infos_) { - mirror::String* const found = image_info.intern_table_->LookupStrong(self, string); + ObjPtr<mirror::String> const found = image_info.intern_table_->LookupStrong(self, string); DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr) << string->ToModifiedUtf8(); if (found != nullptr) { - return found; + return found.Ptr(); } } if (compile_app_image_) { Runtime* const runtime = Runtime::Current(); - mirror::String* found = runtime->GetInternTable()->LookupStrong(self, string); + ObjPtr<mirror::String> found = runtime->GetInternTable()->LookupStrong(self, string); // If we found it in the runtime intern table it could either be in the boot image or interned // during app image compilation. If it was in the boot image return that, otherwise return null // since it belongs to another image space. - if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found)) { - return found; + if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found.Ptr())) { + return found.Ptr(); } DCHECK(runtime->GetInternTable()->LookupWeak(self, string) == nullptr) << string->ToModifiedUtf8(); @@ -1088,7 +1088,8 @@ mirror::Object* ImageWriter::TryAssignBinSlot(WorkStack& work_stack, mirror::String* interned = FindInternedString(obj->AsString()); if (interned == nullptr) { // Not in another image space, insert to our table. - interned = GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString()); + interned = + GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString()).Ptr(); DCHECK_EQ(interned, obj); } } else if (obj->IsDexCache()) { @@ -1448,7 +1449,7 @@ void ImageWriter::CalculateNewObjectOffsets() { for (size_t i = 0, count = dex_file->NumStringIds(); i < count; ++i) { uint32_t utf16_length; const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(i, &utf16_length); - mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data); + mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data).Ptr(); TryAssignBinSlot(work_stack, string, oat_index); } } diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index ca1dc693eb..4960a7343e 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -532,6 +532,25 @@ struct ScopedCheckHandleScope { BaseHandleScope* const handle_scope_; }; +// Number of references allocated in JNI ShadowFrames on the given thread. +static size_t NumJniShadowFrameReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { + return self->GetManagedStack()->NumJniShadowFrameReferences(); +} + +// Number of references in handle scope on the given thread. +static size_t NumHandleReferences(Thread* self) { + size_t count = 0; + for (BaseHandleScope* cur = self->GetTopHandleScope(); cur != nullptr; cur = cur->GetLink()) { + count += cur->NumberOfReferences(); + } + return count; +} + +// Number of references allocated in handle scopes & JNI shadow frames on this thread. +static size_t NumStackReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { + return NumHandleReferences(self) + NumJniShadowFrameReferences(self); +} + static void expectNumStackReferences(size_t val1, size_t val2) { // In rare cases when JNI functions call themselves recursively, // disable this test because it will have a false negative. @@ -539,7 +558,7 @@ static void expectNumStackReferences(size_t val1, size_t val2) { /* @CriticalNative doesn't build a HandleScope, so this test is meaningless then. */ ScopedObjectAccess soa(Thread::Current()); - size_t actual_num = Thread::Current()->NumStackReferences(); + size_t actual_num = NumStackReferences(Thread::Current()); // XX: Not too sure what's going on. // Sometimes null references get placed and sometimes they don't? EXPECT_TRUE(val1 == actual_num || val2 == actual_num) diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 6cbca7a6dc..f9173f50a6 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1759,7 +1759,7 @@ bool OatWriter::WriteVerifierDeps(OutputStream* vdex_out, verifier::VerifierDeps } std::vector<uint8_t> buffer; - verifier_deps->Encode(&buffer); + verifier_deps->Encode(*dex_files_, &buffer); if (!vdex_out->WriteFully(buffer.data(), buffer.size())) { PLOG(ERROR) << "Failed to write verifier deps." diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 529fc9e261..d2357a5d05 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1845,8 +1845,8 @@ void BoundsCheckElimination::Run() { // that value dominated by that instruction fits in that range. Range of that // value can be narrowed further down in the dominator tree. BCEVisitor visitor(graph_, side_effects_, induction_analysis_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* current = graph_->GetReversePostOrder()[i]; if (visitor.IsAddedBlock(current)) { // Skip added blocks. Their effects are already taken care of. continue; @@ -1855,8 +1855,11 @@ void BoundsCheckElimination::Run() { // Skip forward to the current block in case new basic blocks were inserted // (which always appear earlier in reverse post order) to avoid visiting the // same basic block twice. - for ( ; !it.Done() && it.Current() != current; it.Advance()) { - } + size_t new_size = graph_->GetReversePostOrder().size(); + DCHECK_GE(new_size, size); + i += new_size - size; + DCHECK_EQ(current, graph_->GetReversePostOrder()[i]); + size = new_size; } // Perform cleanup. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0f8cdbb19b..8b450e11dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -671,9 +671,9 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, return; } ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); - for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { - if (it.Current()->IsLoopHeader()) { - HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + for (HBasicBlock* block : graph.GetReversePostOrder()) { + if (block->IsLoopHeader()) { + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9f92b20929..be65f89ef1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -601,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj) - : SlowPathCodeARM(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref) + : SlowPathCodeARM(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -613,9 +623,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -634,40 +644,213 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(reg, SP); - DCHECK_NE(reg, LR); - DCHECK_NE(reg, PC); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); // IP is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(reg, IP); - DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // R0 <- obj + // R0 <- ref // R0 <- ReadBarrierMark(R0) - // obj <- R0 + // ref <- R0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ b(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1, + Register temp2) + : SlowPathCodeARM(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(temp1_, IP); + __ Mov(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + Label done; + __ cmp(temp1_, ShifterOperand(ref_reg)); + __ b(&done, EQ); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = IP; // Pointer to actual memory. + Register tmp = temp2_; // Value in memory. + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Label loop_head, exit_loop; + __ Bind(&loop_head); + + __ ldrex(tmp, tmp_ptr); + + __ subs(tmp, tmp, ShifterOperand(expected)); + + __ it(NE); + __ clrex(NE); + + __ b(&exit_loop, NE); + + __ strex(tmp, value, tmp_ptr); + __ cmp(tmp, ShifterOperand(1)); + __ b(&loop_head, EQ); + + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ b(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + const Register temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { public: @@ -5773,7 +5956,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ movt(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->add_pc_label); __ add(temp, temp, ShifterOperand(PC)); - GenerateGcRootFieldLoad(load, out_loc, temp, 0); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -6644,7 +6827,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6689,8 +6874,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. if (index.IsValid()) { - // Load types involving an "index": ArrayGet and - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = @@ -6698,9 +6884,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use - // a register pair as index ("long offset"), of which only the low - // part contains data. + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. Register index_reg = index.IsRegisterPair() ? index.AsRegisterPairLow<Register>() : index.AsRegister<Register>(); @@ -6716,8 +6902,21 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4d59b47861..3d46aab31f 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -283,12 +283,12 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers if requires_read_barrier is true. + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, uint32_t offset, - bool requires_read_barrier = kEmitCompilerReadBarrier); + bool requires_read_barrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, @@ -508,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires an extra temporary register, which must be provided as a + // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -515,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e59d8cc38..b53750966d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -589,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj) - : SlowPathCodeARM64(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref) + : SlowPathCodeARM64(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -602,7 +612,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg())); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -621,40 +632,207 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(obj_.reg(), LR); - DCHECK_NE(obj_.reg(), WSP); - DCHECK_NE(obj_.reg(), WZR); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); // IP0 is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(obj_.reg(), IP0); - DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in W0): // - // W0 <- obj + // W0 <- ref // W0 <- ReadBarrierMark(W0) - // obj <- W0 + // ref <- W0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg()); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); // This runtime call does not require a stack map. arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ B(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp) + : SlowPathCodeARM64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathARM64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = WRegisterFrom(ref_); + DCHECK(locations->CanCall()); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(LocationFrom(temp_).reg(), IP0); + __ Mov(temp_.W(), ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDXR/CMP/BNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + vixl::aarch64::Label done; + __ Cmp(temp_.W(), ref_reg); + __ B(eq, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // Convenience aliases. + Register base = obj_.W(); + Register offset = XRegisterFrom(field_offset_); + Register expected = temp_.W(); + Register value = ref_reg; + Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. + Register tmp_value = temps.AcquireW(); // Value in memory. + + __ Add(tmp_ptr, base.X(), Operand(offset)); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + arm64_codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp_value = [tmp_ptr] - expected; + // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); + + vixl::aarch64::Label loop_head, comparison_failed, exit_loop; + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&comparison_failed, ne); + __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_value, &loop_head); + __ B(&exit_loop); + __ Bind(&comparison_failed); + __ Clrex(); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -768,7 +946,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); + DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); } } @@ -4098,7 +4276,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value(), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -4143,7 +4321,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, out.X(), offset, - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -4180,7 +4358,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -4319,8 +4497,9 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { GenerateGcRootFieldLoad(load, load->GetLocations()->Out(), temp, - /* placeholder */ 0u, - ldr_label); + /* offset placeholder */ 0u, + ldr_label, + kEmitCompilerReadBarrier); SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); codegen_->AddSlowPath(slow_path); @@ -5174,7 +5353,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0U; + size_t no_scale_factor = 0u; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5225,7 +5404,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire) { + bool use_load_acquire, + bool always_update_field) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -5278,7 +5458,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The actual reference load. if (index.IsValid()) { - // Load types involving an "index". + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. if (use_load_acquire) { // UnsafeGetObjectVolatile intrinsic case. // Register `index` is not an index in an object array, but an @@ -5287,9 +5469,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* DCHECK(instruction->GetLocations()->Intrinsified()); DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0U); - DCHECK_EQ(scale_factor, 0U); - DCHECK_EQ(needs_null_check, 0U); + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); + DCHECK_EQ(needs_null_check, 0u); // /* HeapReference<Object> */ ref = *(obj + index) MemOperand field = HeapOperand(obj, XRegisterFrom(index)); LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); @@ -5300,10 +5482,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); Load(type, ref_reg, HeapOperand(obj, computed_offset)); } else { - Register temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); - temps.Release(temp2); + Register temp3 = temps.AcquireW(); + __ Add(temp3, obj, offset); + Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp3); } } } else { @@ -5320,8 +5502,19 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + SlowPathCodeARM64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); /* "times 1" */ + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index eb28ecb427..7f54b4b6b2 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -289,13 +289,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, vixl::aarch64::Register obj, uint32_t offset, - vixl::aarch64::Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + vixl::aarch64::Label* fixup_label, + bool requires_read_barrier); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -594,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -602,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire); + bool use_load_acquire, + bool always_update_field = false); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index bfade3ccbe..cc40522731 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -37,11 +37,12 @@ namespace arm { namespace vixl32 = vixl::aarch32; using namespace vixl32; // NOLINT(build/namespaces) +using helpers::DRegisterFrom; using helpers::DWARFReg; -using helpers::FromLowSToD; using helpers::HighDRegisterFrom; using helpers::HighRegisterFrom; using helpers::InputOperandAt; +using helpers::InputRegister; using helpers::InputRegisterAt; using helpers::InputSRegisterAt; using helpers::InputVRegisterAt; @@ -339,6 +340,46 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL); }; +class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + codegen->EmitParallelMoves( + locations->InAt(0), + LocationFrom(calling_convention.GetRegisterAt(0)), + Primitive::kPrimInt, + locations->InAt(1), + LocationFrom(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() + ? kQuickThrowStringBounds + : kQuickThrowArrayBounds; + arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); + } + + bool IsFatal() const OVERRIDE { return true; } + + const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL); +}; + class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) @@ -393,6 +434,66 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; +class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + } + + const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL); +}; + +class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConventionARMVIXL calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + LocationFrom(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + LocationFrom(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL); +}; + + inline vixl32::Condition ARMCondition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -724,110 +825,6 @@ void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) { __ Blx(lr); } -void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); - locations->SetInAt(0, Location::RequiresRegister()); - if (check->HasUses()) { - locations->SetOut(Location::SameAsFirstInput()); - } -} - -void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { - // We assume the class is not null. - LoadClassSlowPathARMVIXL* slow_path = - new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), - check, - check->GetDexPc(), - /* do_clinit */ true); - codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); -} - -void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( - LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadFromOffset(kLoadWord, - temp, - class_reg, - mirror::Class::StatusOffset().Int32Value()); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); - // Even if the initialized flag is set, we may be in a situation where caches are not synced - // properly. Therefore, we do a memory fence. - __ Dmb(ISH); - __ Bind(slow_path->GetExitLabel()); -} - -// Check if the desired_string_load_kind is supported. If it is, return it, -// otherwise return a fall-back kind that should be used instead. -HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code. - return HLoadString::LoadKind::kDexCacheViaMethod; -} - -void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - - // TODO(VIXL): Implement optimized code paths. - // See InstructionCodeGeneratorARMVIXL::VisitLoadString. - HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); - // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead. - locations->SetOut(LocationFrom(r0)); - } else { - locations->SetOut(Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { - // TODO(VIXL): Implement optimized code paths. - // We implemented the simplest solution to get first ART tests passing, we deferred the - // optimized path until later, we should implement it using ARM64 implementation as a - // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString. - - // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex()); - codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); -} - -// Check if the desired_class_load_kind is supported. If it is, return it, -// otherwise return a fall-back kind that should be used instead. -HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) { - // TODO(VIXL): Implement optimized code paths. - return HLoadClass::LoadKind::kDexCacheViaMethod; -} - -// Check if the desired_dispatch_info is supported. If it is, return it, -// otherwise return a fall-back info that should be used instead. -HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { - // TODO(VIXL): Implement optimized code paths. - return { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; -} - -// Copy the result of a call into the given target. -void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - Primitive::Type type ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); -} - void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { DCHECK(!successor->IsExitBlock()); HBasicBlock* block = got->GetBlock(); @@ -898,14 +895,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) { __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0); } else { DCHECK_EQ(type, Primitive::kPrimDouble); - __ Vcmp(F64, FromLowSToD(LowSRegisterFrom(lhs_loc)), 0.0); + __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0); } } else { if (type == Primitive::kPrimFloat) { __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1)); } else { DCHECK_EQ(type, Primitive::kPrimDouble); - __ Vcmp(FromLowSToD(LowSRegisterFrom(lhs_loc)), FromLowSToD(LowSRegisterFrom(rhs_loc))); + __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc)); } } } @@ -1131,6 +1128,24 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } +void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARMVIXL* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); +} + void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); if (Primitive::IsFloatingPointType(select->GetType())) { @@ -1347,6 +1362,28 @@ void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitFloatConstant( + HFloatConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + +void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( + HDoubleConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -1419,6 +1456,65 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } +void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { + LocationSummary* locations = neg->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (neg->GetResultType()) { + case Primitive::kPrimInt: + __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0); + break; + + case Primitive::kPrimLong: + // out.lo = 0 - in.lo (and update the carry/borrow (C) flag) + __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0); + // We cannot emit an RSC (Reverse Subtract with Carry) + // instruction here, as it does not exist in the Thumb-2 + // instruction set. We use the following approach + // using SBC and SUB instead. + // + // out.hi = -C + __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out)); + // out.hi = out.hi - in.hi + __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in)); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0)); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); + } +} + void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); @@ -1717,7 +1813,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimDouble: { // Processing a Dex `double-to-int' instruction. vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); - __ Vcvt(I32, F64, temp_s, FromLowSToD(LowSRegisterFrom(in))); + __ Vcvt(I32, F64, temp_s, DRegisterFrom(in)); __ Vmov(OutputRegister(conversion), temp_s); break; } @@ -1805,7 +1901,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - __ Vcvt(F32, F64, OutputSRegister(conversion), FromLowSToD(LowSRegisterFrom(in))); + __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in)); break; default: @@ -1824,7 +1920,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimChar: { // Processing a Dex `int-to-double' instruction. __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0)); - __ Vcvt(F64, I32, FromLowSToD(LowSRegisterFrom(out)), LowSRegisterFrom(out)); + __ Vcvt(F64, I32, DRegisterFrom(out), LowSRegisterFrom(out)); break; } @@ -1834,13 +1930,12 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve vixl32::Register high = HighRegisterFrom(in); vixl32::SRegister out_s = LowSRegisterFrom(out); - vixl32::DRegister out_d = FromLowSToD(out_s); + vixl32::DRegister out_d = DRegisterFrom(out); vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); - vixl32::DRegister temp_d = FromLowSToD(temp_s); + vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0)); - vixl32::SRegister constant_s = LowSRegisterFrom(locations->GetTemp(1)); - vixl32::DRegister constant_d = FromLowSToD(constant_s); + vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(0)); // temp_d = int-to-double(high) __ Vmov(temp_s, high); @@ -1857,7 +1952,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - __ Vcvt(F64, F32, FromLowSToD(LowSRegisterFrom(out)), InputSRegisterAt(conversion, 0)); + __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0)); break; default: @@ -2071,267 +2166,6 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { } } -void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetOut(LocationFrom(r0)); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); -} - -void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { - InvokeRuntimeCallingConventionARMVIXL calling_convention; - __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); -} - -void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); - if (instruction->IsStringAlloc()) { - locations->AddTemp(LocationFrom(kMethodRegister)); - } else { - InvokeRuntimeCallingConventionARMVIXL calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); - } - locations->SetOut(LocationFrom(r0)); -} - -void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); - GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); - GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); - AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ blx(lr); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); - } -} - -void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); - if (location.IsStackSlot()) { - location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } else if (location.IsDoubleStackSlot()) { - location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); - } - locations->SetOut(location); -} - -void InstructionCodeGeneratorARMVIXL::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, the parameter is already at its location. -} - -void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetOut(LocationFrom(kMethodRegister)); -} - -void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { - // Nothing to do, the method is already at its location. -} - -void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { - LocationSummary* locations = not_->GetLocations(); - Location out = locations->Out(); - Location in = locations->InAt(0); - switch (not_->GetResultType()) { - case Primitive::kPrimInt: - __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0)); - break; - - case Primitive::kPrimLong: - __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); - __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); - break; - - default: - LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); - } -} - -void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Output overlaps because it is written before doing the low comparison. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - break; - } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); - break; - } - default: - LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); - } -} - -void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { - LocationSummary* locations = compare->GetLocations(); - vixl32::Register out = OutputRegister(compare); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - - vixl32::Label less, greater, done; - Primitive::Type type = compare->InputAt(0)->GetType(); - vixl32::Condition less_cond = vixl32::Condition(kNone); - switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { - // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. - __ Mov(out, 0); - __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. - less_cond = lt; - break; - } - case Primitive::kPrimLong: { - __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. - __ B(lt, &less); - __ B(gt, &greater); - // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. - __ Mov(out, 0); - __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. - less_cond = lo; - break; - } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - __ Mov(out, 0); - GenerateVcmp(compare); - // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); - break; - } - default: - LOG(FATAL) << "Unexpected compare type " << type; - UNREACHABLE(); - } - - __ B(eq, &done); - __ B(less_cond, &less); - - __ Bind(&greater); - __ Mov(out, 1); - __ B(&done); - - __ Bind(&less); - __ Mov(out, -1); - - __ Bind(&done); -} - -void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - locations->SetInAt(i, Location::Any()); - } - locations->SetOut(Location::Any()); -} - -void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; -} - -void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { - // TODO (ported from quick): revisit ARM barrier kinds. - DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. - switch (kind) { - case MemBarrierKind::kAnyStore: - case MemBarrierKind::kLoadAny: - case MemBarrierKind::kAnyAny: { - flavor = DmbOptions::ISH; - break; - } - case MemBarrierKind::kStoreStore: { - flavor = DmbOptions::ISHST; - break; - } - default: - LOG(FATAL) << "Unexpected memory barrier " << kind; - } - __ Dmb(flavor); -} - -void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr, - uint32_t offset, - vixl32::Register out_lo, - vixl32::Register out_hi) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - if (offset != 0) { - vixl32::Register temp = temps.Acquire(); - __ Add(temp, addr, offset); - addr = temp; - } - __ Ldrexd(out_lo, out_hi, addr); -} - -void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr, - uint32_t offset, - vixl32::Register value_lo, - vixl32::Register value_hi, - vixl32::Register temp1, - vixl32::Register temp2, - HInstruction* instruction) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Label fail; - if (offset != 0) { - vixl32::Register temp = temps.Acquire(); - __ Add(temp, addr, offset); - addr = temp; - } - __ Bind(&fail); - // We need a load followed by store. (The address used in a STREX instruction must - // be the same as the address in the most recently executed LDREX instruction.) - __ Ldrexd(temp1, temp2, addr); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ Strexd(temp1, value_lo, value_hi, addr); - __ Cbnz(temp1, &fail); -} - void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); DCHECK(instruction->GetResultType() == Primitive::kPrimInt); @@ -2592,6 +2426,647 @@ void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instructi } } +void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + vixl32::Register in = InputRegisterAt(ror, 0); + Location rhs = locations->InAt(1); + vixl32::Register out = OutputRegister(ror); + + if (rhs.IsConstant()) { + // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], + // so map all rotations to a +ve. equivalent in that range. + // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) + uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F; + if (rot) { + // Rotate, mapping left rotations to right equivalents if necessary. + // (e.g. left by 2 bits == right by 30.) + __ Ror(out, in, rot); + } else if (!out.Is(in)) { + __ Mov(out, in); + } + } else { + __ Ror(out, in, RegisterFrom(rhs)); + } +} + +// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer +// rotates by swapping input regs (effectively rotating by the first 32-bits of +// a larger rotation) or flipping direction (thus treating larger right/left +// rotations as sub-word sized rotations in the other direction) as appropriate. +void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); + vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); + Location rhs = locations->InAt(1); + vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out()); + vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out()); + + if (rhs.IsConstant()) { + uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant()); + // Map all rotations to +ve. equivalents on the interval [0,63]. + rot &= kMaxLongShiftDistance; + // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate + // logic below to a simple pair of binary orr. + // (e.g. 34 bits == in_reg swap + 2 bits right.) + if (rot >= kArmBitsPerWord) { + rot -= kArmBitsPerWord; + std::swap(in_reg_hi, in_reg_lo); + } + // Rotate, or mov to out for zero or word size rotations. + if (rot != 0u) { + __ Lsr(out_reg_hi, in_reg_hi, rot); + __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot)); + __ Lsr(out_reg_lo, in_reg_lo, rot); + __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot)); + } else { + __ Mov(out_reg_lo, in_reg_lo); + __ Mov(out_reg_hi, in_reg_hi); + } + } else { + vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0)); + vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); + vixl32::Label end; + vixl32::Label shift_by_32_plus_shift_right; + + __ And(shift_right, RegisterFrom(rhs), 0x1F); + __ Lsrs(shift_left, RegisterFrom(rhs), 6); + // TODO(VIXL): Check that flags are kept after "vixl32::LeaveFlags" enabled. + __ Rsb(shift_left, shift_right, kArmBitsPerWord); + __ B(cc, &shift_by_32_plus_shift_right); + + // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). + // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). + __ Lsl(out_reg_hi, in_reg_hi, shift_left); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Add(out_reg_hi, out_reg_hi, out_reg_lo); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Lsr(shift_left, in_reg_hi, shift_right); + __ Add(out_reg_lo, out_reg_lo, shift_left); + __ B(&end); + + __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. + // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). + // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). + __ Lsr(out_reg_hi, in_reg_hi, shift_right); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Add(out_reg_hi, out_reg_hi, out_reg_lo); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Lsl(shift_right, in_reg_hi, shift_left); + __ Add(out_reg_lo, out_reg_lo, shift_right); + + __ Bind(&end); + } +} + +void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + switch (ror->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (ror->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) { + Primitive::Type type = ror->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HandleIntegerRotate(ror); + break; + } + case Primitive::kPrimLong: { + HandleLongRotate(ror); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + // Make the output overlap, as it will be used to hold the masked + // second input. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + // For simplicity, use kOutputOverlap even though we only require that low registers + // don't clash with high registers which the register allocator currently guarantees. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = op->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + Primitive::Type type = op->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + vixl32::Register out_reg = OutputRegister(op); + vixl32::Register first_reg = InputRegisterAt(op, 0); + if (second.IsRegister()) { + vixl32::Register second_reg = RegisterFrom(second); + // ARM doesn't mask the shift count so we need to do it ourselves. + __ And(out_reg, second_reg, kMaxIntShiftDistance); + if (op->IsShl()) { + __ Lsl(out_reg, first_reg, out_reg); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, out_reg); + } else { + __ Lsr(out_reg, first_reg, out_reg); + } + } else { + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = cst & kMaxIntShiftDistance; + if (shift_value == 0) { // ARM does not support shifting with 0 immediate. + __ Mov(out_reg, first_reg); + } else if (op->IsShl()) { + __ Lsl(out_reg, first_reg, shift_value); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, shift_value); + } else { + __ Lsr(out_reg, first_reg, shift_value); + } + } + break; + } + case Primitive::kPrimLong: { + vixl32::Register o_h = HighRegisterFrom(out); + vixl32::Register o_l = LowRegisterFrom(out); + + vixl32::Register high = HighRegisterFrom(first); + vixl32::Register low = LowRegisterFrom(first); + + if (second.IsRegister()) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + + vixl32::Register second_reg = RegisterFrom(second); + + if (op->IsShl()) { + __ And(o_l, second_reg, kMaxLongShiftDistance); + // Shift the high part + __ Lsl(o_h, high, o_l); + // Shift the low part and `or` what overflew on the high part + __ Rsb(temp, o_l, kArmBitsPerWord); + __ Lsr(temp, low, temp); + __ Orr(o_h, o_h, temp); + // If the shift is > 32 bits, override the high part + __ Subs(temp, o_l, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ lsl(pl, o_h, low, temp); + } + // Shift the low part + __ Lsl(o_l, low, o_l); + } else if (op->IsShr()) { + __ And(o_h, second_reg, kMaxLongShiftDistance); + // Shift the low part + __ Lsr(o_l, low, o_h); + // Shift the high part and `or` what underflew on the low part + __ Rsb(temp, o_h, kArmBitsPerWord); + __ Lsl(temp, high, temp); + __ Orr(o_l, o_l, temp); + // If the shift is > 32 bits, override the low part + __ Subs(temp, o_h, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ asr(pl, o_l, high, temp); + } + // Shift the high part + __ Asr(o_h, high, o_h); + } else { + __ And(o_h, second_reg, kMaxLongShiftDistance); + // same as Shr except we use `Lsr`s and not `Asr`s + __ Lsr(o_l, low, o_h); + __ Rsb(temp, o_h, kArmBitsPerWord); + __ Lsl(temp, high, temp); + __ Orr(o_l, o_l, temp); + __ Subs(temp, o_h, kArmBitsPerWord); + { + AssemblerAccurateScope guard(GetVIXLAssembler(), + 3 * kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(pl); + __ lsr(pl, o_l, high, temp); + } + __ Lsr(o_h, high, o_h); + } + } else { + // Register allocator doesn't create partial overlap. + DCHECK(!o_l.Is(high)); + DCHECK(!o_h.Is(low)); + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = cst & kMaxLongShiftDistance; + if (shift_value > 32) { + if (op->IsShl()) { + __ Lsl(o_h, low, shift_value - 32); + __ Mov(o_l, 0); + } else if (op->IsShr()) { + __ Asr(o_l, high, shift_value - 32); + __ Asr(o_h, high, 31); + } else { + __ Lsr(o_l, high, shift_value - 32); + __ Mov(o_h, 0); + } + } else if (shift_value == 32) { + if (op->IsShl()) { + __ Mov(o_h, low); + __ Mov(o_l, 0); + } else if (op->IsShr()) { + __ Mov(o_l, high); + __ Asr(o_h, high, 31); + } else { + __ Mov(o_l, high); + __ Mov(o_h, 0); + } + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ Adc(o_h, high, high); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; + if (op->IsShl()) { + __ Lsl(o_h, high, shift_value); + __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value)); + __ Lsl(o_l, low, shift_value); + } else if (op->IsShr()) { + __ Lsr(o_l, low, shift_value); + __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); + __ Asr(o_h, high, shift_value); + } else { + __ Lsr(o_l, low, shift_value); + __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value)); + __ Lsr(o_h, high, shift_value); + } + } + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderARMVIXL::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + if (instruction->IsStringAlloc()) { + locations->AddTemp(LocationFrom(kMethodRegister)); + } else { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + } + locations->SetOut(LocationFrom(r0)); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + if (instruction->IsStringAlloc()) { + // String is allocated through StringFactory. Call NewEmptyString entry point. + vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); + MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); + GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); + GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ blx(lr); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + } else { + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + } +} + +void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetOut(LocationFrom(r0)); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); +} + +void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorARMVIXL::VisitParameterValue( + HParameterValue* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(LocationFrom(kMethodRegister)); +} + +void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( + HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the method is already at its location. +} + +void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { + LocationSummary* locations = not_->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + switch (not_->GetResultType()) { + case Primitive::kPrimInt: + __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0)); + break; + + case Primitive::kPrimLong: + __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); + __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); + break; + + default: + LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { + __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1); +} + +void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Output overlaps because it is written before doing the low comparison. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { + LocationSummary* locations = compare->GetLocations(); + vixl32::Register out = OutputRegister(compare); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + vixl32::Label less, greater, done; + Primitive::Type type = compare->InputAt(0)->GetType(); + vixl32::Condition less_cond = vixl32::Condition(kNone); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. + less_cond = lt; + break; + } + case Primitive::kPrimLong: { + __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. + __ B(lt, &less); + __ B(gt, &greater); + // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. + __ Mov(out, 0); + __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. + less_cond = lo; + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + __ Mov(out, 0); + GenerateVcmp(compare); + // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); + break; + } + default: + LOG(FATAL) << "Unexpected compare type " << type; + UNREACHABLE(); + } + + __ B(eq, &done); + __ B(less_cond, &less); + + __ Bind(&greater); + __ Mov(out, 1); + __ B(&done); + + __ Bind(&less); + __ Mov(out, -1); + + __ Bind(&done); +} + +void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit ARM barrier kinds. + DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. + switch (kind) { + case MemBarrierKind::kAnyStore: + case MemBarrierKind::kLoadAny: + case MemBarrierKind::kAnyAny: { + flavor = DmbOptions::ISH; + break; + } + case MemBarrierKind::kStoreStore: { + flavor = DmbOptions::ISHST; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ Dmb(flavor); +} + +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr, + uint32_t offset, + vixl32::Register out_lo, + vixl32::Register out_hi) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr, + uint32_t offset, + vixl32::Register value_lo, + vixl32::Register value_hi, + vixl32::Register temp1, + vixl32::Register temp2, + HInstruction* instruction) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label fail; + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ Ldrexd(temp1, temp2, addr); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Strexd(temp1, value_lo, value_hi, addr); + __ Cbnz(temp1, &fail); +} + void LocationsBuilderARMVIXL::HandleFieldSet( HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); @@ -2708,7 +3183,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimDouble: { - vixl32::DRegister value_reg = FromLowSToD(LowSRegisterFrom(value)); + vixl32::DRegister value_reg = DRegisterFrom(value); if (is_volatile && !atomic_ldrd_strd) { vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0)); vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1)); @@ -2750,16 +3225,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, } } -Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { - DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType(); - if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || - (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { - return Location::ConstantLocation(input->AsConstant()); - } else { - return Location::RequiresFpuRegister(); - } -} - void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -2809,6 +3274,78 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } } +Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { + DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType(); + if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || + (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { + return Location::ConstantLocation(input->AsConstant()); + } else { + return Location::RequiresFpuRegister(); + } +} + +Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, + Opcode opcode) { + DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); + if (constant->IsConstant() && + CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { + return Location::ConstantLocation(constant->AsConstant()); + } + return Location::RequiresRegister(); +} + +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, + Opcode opcode) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); + if (Primitive::Is64BitType(input_cst->GetType())) { + Opcode high_opcode = opcode; + SetCc low_set_cc = kCcDontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); + } + high_opcode = ADC; + low_set_cc = kCcSet; + break; + default: + break; + } + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && + CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); + } else { + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); + } +} + +// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization +// enabled. +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, + Opcode opcode, + SetCc set_cc) { + ArmVIXLAssembler* assembler = codegen_->GetAssembler(); + if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) { + return true; + } + Opcode neg_opcode = kNoOperand; + switch (opcode) { + case AND: neg_opcode = BIC; value = ~value; break; + case ORR: neg_opcode = ORN; value = ~value; break; + case ADD: neg_opcode = SUB; value = -value; break; + case ADC: neg_opcode = SBC; value = ~value; break; + case SUB: neg_opcode = ADD; value = -value; break; + case SBC: neg_opcode = ADC; value = ~value; break; + default: + return false; + } + return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc); +} + void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -2874,7 +3411,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; case Primitive::kPrimDouble: { - vixl32::DRegister out_dreg = FromLowSToD(LowSRegisterFrom(out)); + vixl32::DRegister out_dreg = DRegisterFrom(out); if (is_volatile && !atomic_ldrd_strd) { vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); vixl32::Register hi = RegisterFrom(locations->GetTemp(1)); @@ -2938,6 +3475,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr HandleFieldGet(instruction, instruction->GetFieldInfo()); } +void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); +} + void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) { // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() @@ -2974,6 +3519,554 @@ void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) { codegen_->GenerateNullCheck(instruction); } +static LoadOperandType GetLoadOperandType(Primitive::Type type) { + switch (type) { + case Primitive::kPrimNot: + return kLoadWord; + case Primitive::kPrimBoolean: + return kLoadUnsignedByte; + case Primitive::kPrimByte: + return kLoadSignedByte; + case Primitive::kPrimChar: + return kLoadUnsignedHalfword; + case Primitive::kPrimShort: + return kLoadSignedHalfword; + case Primitive::kPrimInt: + return kLoadWord; + case Primitive::kPrimLong: + return kLoadWordPair; + case Primitive::kPrimFloat: + return kLoadSWord; + case Primitive::kPrimDouble: + return kLoadDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +static StoreOperandType GetStoreOperandType(Primitive::Type type) { + switch (type) { + case Primitive::kPrimNot: + return kStoreWord; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + return kStoreByte; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + return kStoreHalfword; + case Primitive::kPrimInt: + return kStoreWord; + case Primitive::kPrimLong: + return kStoreWordPair; + case Primitive::kPrimFloat: + return kStoreSWord; + case Primitive::kPrimDouble: + return kStoreDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(Primitive::Type type, + Location out_loc, + vixl32::Register base, + vixl32::Register reg_index, + vixl32::Condition cond) { + uint32_t shift_count = Primitive::ComponentSizeShift(type); + MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); + + switch (type) { + case Primitive::kPrimByte: + __ Ldrsb(cond, RegisterFrom(out_loc), mem_address); + break; + case Primitive::kPrimBoolean: + __ Ldrb(cond, RegisterFrom(out_loc), mem_address); + break; + case Primitive::kPrimShort: + __ Ldrsh(cond, RegisterFrom(out_loc), mem_address); + break; + case Primitive::kPrimChar: + __ Ldrh(cond, RegisterFrom(out_loc), mem_address); + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + __ Ldr(cond, RegisterFrom(out_loc), mem_address); + break; + // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types. + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(Primitive::Type type, + Location loc, + vixl32::Register base, + vixl32::Register reg_index, + vixl32::Condition cond) { + uint32_t shift_count = Primitive::ComponentSizeShift(type); + MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); + + switch (type) { + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + __ Strb(cond, RegisterFrom(loc), mem_address); + break; + case Primitive::kPrimShort: + case Primitive::kPrimChar: + __ Strh(cond, RegisterFrom(loc), mem_address); + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + __ Str(cond, RegisterFrom(loc), mem_address); + break; + // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types. + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + TODO_VIXL32(FATAL); + } + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->GetType())) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); + } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + // Also need for String compression feature. + if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) + || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + TODO_VIXL32(FATAL); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + vixl32::Register obj = InputRegisterAt(instruction, 0); + Location index = locations->InAt(1); + Location out_loc = locations->Out(); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = mirror::kUseStringCompression && + instruction->IsStringCharAt(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + if (maybe_compressed_char_at) { + TODO_VIXL32(FATAL); + } else { + uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + + LoadOperandType load_type = GetLoadOperandType(type); + GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset); + } + } else { + vixl32::Register temp = temps.Acquire(); + + if (has_intermediate_address) { + TODO_VIXL32(FATAL); + } else { + __ Add(temp, obj, data_offset); + } + if (maybe_compressed_char_at) { + TODO_VIXL32(FATAL); + } else { + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); + } + } + break; + } + + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + TODO_VIXL32(FATAL); + } else { + vixl32::Register out = OutputRegister(instruction); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + vixl32::Register temp = temps.Acquire(); + + if (has_intermediate_address) { + TODO_VIXL32(FATAL); + } else { + __ Add(temp, obj, data_offset); + } + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); + + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + + case Primitive::kPrimLong: { + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); + GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset); + } + break; + } + + case Primitive::kPrimFloat: { + vixl32::SRegister out = SRegisterFrom(out_loc); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->LoadSFromOffset(out, obj, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); + GetAssembler()->LoadSFromOffset(out, temp, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); + GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } + + if (type == Primitive::kPrimNot) { + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else if (!maybe_compressed_char_at) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } +} + +void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, + may_need_runtime_call_for_type_check ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresRegister()); + } + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register array = InputRegisterAt(instruction, 0); + Location index = locations->InAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + uint32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); + Location value_loc = locations->InAt(2); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + uint32_t full_offset = + data_offset + (const_index << Primitive::ComponentSizeShift(value_type)); + StoreOperandType store_type = GetStoreOperandType(value_type); + GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset); + } else { + vixl32::Register temp = temps.Acquire(); + + if (has_intermediate_address) { + TODO_VIXL32(FATAL); + } else { + __ Add(temp, array, data_offset); + } + codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); + } + break; + } + + case Primitive::kPrimNot: { + vixl32::Register value = RegisterFrom(value_loc); + // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet. + // See the comment in instruction_simplifier_shared.cc. + DCHECK(!has_intermediate_address); + + if (instruction->InputAt(2)->IsNullConstant()) { + // Just setting null. + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, data_offset); + codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); + break; + } + + DCHECK(needs_write_barrier); + Location temp1_loc = locations->GetTemp(0); + vixl32::Register temp1 = RegisterFrom(temp1_loc); + Location temp2_loc = locations->GetTemp(1); + vixl32::Register temp2 = RegisterFrom(temp2_loc); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + vixl32::Label done; + SlowPathCodeARMVIXL* slow_path = nullptr; + + if (may_need_runtime_call_for_type_check) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + vixl32::Label non_zero; + __ Cbnz(value, &non_zero); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->StoreToOffset(kStoreWord, value, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, data_offset); + codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ B(&done); + __ Bind(&non_zero); + } + + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = array->klass_ + GetAssembler()->LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp1, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl32::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Cbnz(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + } + + vixl32::Register source = value; + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(value_type, Primitive::kPrimNot); + __ Mov(temp1, value); + GetAssembler()->PoisonHeapReference(temp1); + source = temp1; + } + + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->StoreToOffset(kStoreWord, source, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, data_offset); + codegen_->StoreToShiftedRegOffset(value_type, + LocationFrom(source), + temp, + RegisterFrom(index)); + } + + if (!may_need_runtime_call_for_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull()); + + if (done.IsReferenced()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } + + break; + } + + case Primitive::kPrimLong: { + Location value = locations->InAt(2); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); + GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset); + } + break; + } + + case Primitive::kPrimFloat: { + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4)); + GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset); + } else { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8)); + GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << value_type; + UNREACHABLE(); + } + + // Objects are handled in the switch. + if (value_type != Primitive::kPrimNot) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } +} + void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2990,6 +4083,28 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/ } +void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + codegen_->AddSlowPath(slow_path); + + vixl32::Register index = InputRegisterAt(instruction, 0); + vixl32::Register length = InputRegisterAt(instruction, 1); + + __ Cmp(index, length); + __ B(hs, slow_path->GetEntryLabel()); +} + void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, vixl32::Register card, vixl32::Register object, @@ -3101,7 +4216,14 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); } } else if (source.IsFpuRegister()) { - TODO_VIXL32(FATAL); + if (destination.IsRegister()) { + __ Vmov(RegisterFrom(destination), SRegisterFrom(source)); + } else if (destination.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); + } else { + DCHECK(destination.IsStackSlot()); + GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); + } } else if (source.IsDoubleStackSlot()) { if (destination.IsDoubleStackSlot()) { vixl32::DRegister temp = temps.AcquireD(); @@ -3112,16 +4234,15 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { GetAssembler()->LoadFromOffset( kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex()); } else { - TODO_VIXL32(FATAL); + DCHECK(destination.IsFpuRegisterPair()) << destination; + GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex()); } } else if (source.IsRegisterPair()) { if (destination.IsRegisterPair()) { __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source)); __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source)); } else if (destination.IsFpuRegisterPair()) { - __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), - LowRegisterFrom(source), - HighRegisterFrom(source)); + __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source)); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; DCHECK(ExpectedPairLayout(source)); @@ -3131,7 +4252,14 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { destination.GetStackIndex()); } } else if (source.IsFpuRegisterPair()) { - TODO_VIXL32(FATAL); + if (destination.IsRegisterPair()) { + __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source)); + } else if (destination.IsFpuRegisterPair()) { + __ Vmov(DRegisterFrom(destination), DRegisterFrom(source)); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex()); + } } else { DCHECK(source.IsConstant()) << source; HConstant* constant = source.GetConstant(); @@ -3164,7 +4292,7 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { } else if (constant->IsDoubleConstant()) { double value = constant->AsDoubleConstant()->GetValue(); if (destination.IsFpuRegisterPair()) { - __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), value); + __ Vmov(DRegisterFrom(destination), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; uint64_t int_value = bit_cast<uint64_t, double>(value); @@ -3273,6 +4401,14 @@ void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } +// Check if the desired_class_load_kind is supported. If it is, return it, +// otherwise return a fall-back kind that should be used instead. +HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return HLoadClass::LoadKind::kDexCacheViaMethod; +} + void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { if (cls->NeedsAccessCheck()) { InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -3321,7 +4457,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + kEmitCompilerReadBarrier); break; } case HLoadClass::LoadKind::kDexCacheViaMethod: { @@ -3333,7 +4470,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset); // /* GcRoot<mirror::Class> */ out = out[type_index] size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - GenerateGcRootFieldLoad(cls, out_loc, out, offset); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier); generate_null_check = !cls->IsInDexCache(); break; } @@ -3357,6 +4494,292 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { } } +void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, Location::RequiresRegister()); + if (check->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { + // We assume the class is not null. + LoadClassSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), + check, + check->GetDexPc(), + /* do_clinit */ true); + codegen_->AddSlowPath(slow_path); + GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); +} + +void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( + LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, + temp, + class_reg, + mirror::Class::StatusOffset().Int32Value()); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + // Even if the initialized flag is set, we may be in a situation where caches are not synced + // properly. Therefore, we do a memory fence. + __ Dmb(ISH); + __ Bind(slow_path->GetExitLabel()); +} + +// Check if the desired_string_load_kind is supported. If it is, return it, +// otherwise return a fall-back kind that should be used instead. +HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code. + return HLoadString::LoadKind::kDexCacheViaMethod; +} + +void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + + // TODO(VIXL): Implement optimized code paths. + // See InstructionCodeGeneratorARMVIXL::VisitLoadString. + HLoadString::LoadKind load_kind = load->GetLoadKind(); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead. + locations->SetOut(LocationFrom(r0)); + } else { + locations->SetOut(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { + // TODO(VIXL): Implement optimized code paths. + // We implemented the simplest solution to get first ART tests passing, we deferred the + // optimized path until later, we should implement it using ARM64 implementation as a + // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString. + + // TODO: Re-add the compiler code to do string dex cache lookup again. + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); +} + +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kArmPointerSize>().Int32Value(); +} + +void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { + vixl32::Register out = OutputRegister(load); + GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset()); +} + + +void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, 0); + GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset()); +} + +void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { + HandleBitwiseOperation(instruction, AND); +} + +void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) { + HandleBitwiseOperation(instruction, ORR); +} + +void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) { + HandleBitwiseOperation(instruction, EOR); +} + +void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt + || instruction->GetResultType() == Primitive::kPrimLong); + // Note: GVN reorders commutative operations to have the constant on the right hand side. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) { + HandleBitwiseOperation(instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) { + HandleBitwiseOperation(instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) { + HandleBitwiseOperation(instruction); +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier). + if (value == 0xffffffffu) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + if (value == 0u) { + __ Mov(out, 0); + return; + } + if (GetAssembler()->ShifterOperandCanHold(AND, value)) { + __ And(out, first, value); + } else { + DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value)); + __ Bic(out, first, ~value); + } +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier). + if (value == 0u) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + if (value == 0xffffffffu) { + __ Mvn(out, 0); + return; + } + if (GetAssembler()->ShifterOperandCanHold(ORR, value)) { + __ Orr(out, first, value); + } else { + DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value)); + __ Orn(out, first, ~value); + } +} + +// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. +void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out, + vixl32::Register first, + uint32_t value) { + // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier). + if (value == 0u) { + if (!out.Is(first)) { + __ Mov(out, first); + } + return; + } + __ Eor(out, first, value); +} + +void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + uint32_t value_low = Low32Bits(value); + if (instruction->GetResultType() == Primitive::kPrimInt) { + vixl32::Register first_reg = InputRegisterAt(instruction, 0); + vixl32::Register out_reg = OutputRegister(instruction); + if (instruction->IsAnd()) { + GenerateAndConst(out_reg, first_reg, value_low); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_reg, first_reg, value_low); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_reg, first_reg, value_low); + } + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + uint32_t value_high = High32Bits(value); + vixl32::Register first_low = LowRegisterFrom(first); + vixl32::Register first_high = HighRegisterFrom(first); + vixl32::Register out_low = LowRegisterFrom(out); + vixl32::Register out_high = HighRegisterFrom(out); + if (instruction->IsAnd()) { + GenerateAndConst(out_low, first_low, value_low); + GenerateAndConst(out_high, first_high, value_high); + } else if (instruction->IsOr()) { + GenerateOrrConst(out_low, first_low, value_low); + GenerateOrrConst(out_high, first_high, value_high); + } else { + DCHECK(instruction->IsXor()); + GenerateEorConst(out_low, first_low, value_low); + GenerateEorConst(out_high, first_high, value_high); + } + } + return; + } + + if (instruction->GetResultType() == Primitive::kPrimInt) { + vixl32::Register first_reg = InputRegisterAt(instruction, 0); + vixl32::Register second_reg = InputRegisterAt(instruction, 1); + vixl32::Register out_reg = OutputRegister(instruction); + if (instruction->IsAnd()) { + __ And(out_reg, first_reg, second_reg); + } else if (instruction->IsOr()) { + __ Orr(out_reg, first_reg, second_reg); + } else { + DCHECK(instruction->IsXor()); + __ Eor(out_reg, first_reg, second_reg); + } + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + vixl32::Register first_low = LowRegisterFrom(first); + vixl32::Register first_high = HighRegisterFrom(first); + vixl32::Register second_low = LowRegisterFrom(second); + vixl32::Register second_high = HighRegisterFrom(second); + vixl32::Register out_low = LowRegisterFrom(out); + vixl32::Register out_high = HighRegisterFrom(out); + if (instruction->IsAnd()) { + __ And(out_low, first_low, second_low); + __ And(out_high, first_high, second_high); + } else if (instruction->IsOr()) { + __ Orr(out_low, first_low, second_low); + __ Orr(out_high, first_high, second_high); + } else { + DCHECK(instruction->IsXor()); + __ Eor(out_low, first_low, second_low); + __ Eor(out_high, first_high, second_high); + } + } +} + void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( HInstruction* instruction ATTRIBUTE_UNUSED, Location root, @@ -3375,6 +4798,34 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, + Location out, + Location ref ATTRIBUTE_UNUSED, + Location obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED) { + if (kEmitCompilerReadBarrier) { + DCHECK(!kUseBakerReadBarrier); + TODO_VIXL32(FATAL); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(RegisterFrom(out)); + } +} + +// Check if the desired_dispatch_info is supported. If it is, return it, +// otherwise return a fall-back info that should be used instead. +HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED, + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return { + HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + 0u, + 0u + }; +} + vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( HInvokeStaticOrDirect* invoke, vixl32::Register temp) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); @@ -3490,56 +4941,10 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location __ Blx(lr); } -static int32_t GetExceptionTlsOffset() { - return Thread::ExceptionOffset<kArmPointerSize>().Int32Value(); -} - -void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { - vixl32::Register out = OutputRegister(load); - GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset()); -} - -void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); -} - -void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Mov(temp, 0); - GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset()); -} - -void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); -} - -void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { - codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); -} - -void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, - Location out, - Location ref ATTRIBUTE_UNUSED, - Location obj ATTRIBUTE_UNUSED, - uint32_t offset ATTRIBUTE_UNUSED, - Location index ATTRIBUTE_UNUSED) { - if (kEmitCompilerReadBarrier) { - DCHECK(!kUseBakerReadBarrier); - TODO_VIXL32(FATAL); - } else if (kPoisonHeapReferences) { - GetAssembler()->UnpoisonHeapReference(RegisterFrom(out)); - } +// Copy the result of a call into the given target. +void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, + Primitive::Type type ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); } #undef __ diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 7815db2960..df7d46782d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -105,17 +105,25 @@ class LoadClassSlowPathARMVIXL; M(Above) \ M(AboveOrEqual) \ M(Add) \ + M(And) \ + M(ArrayGet) \ M(ArrayLength) \ + M(ArraySet) \ M(Below) \ M(BelowOrEqual) \ + M(BooleanNot) \ + M(BoundsCheck) \ M(ClearException) \ M(ClinitCheck) \ M(Compare) \ M(CurrentMethod) \ + M(Deoptimize) \ M(Div) \ M(DivZeroCheck) \ + M(DoubleConstant) \ M(Equal) \ M(Exit) \ + M(FloatConstant) \ M(Goto) \ M(GreaterThan) \ M(GreaterThanOrEqual) \ @@ -133,57 +141,49 @@ class LoadClassSlowPathARMVIXL; M(LongConstant) \ M(MemoryBarrier) \ M(Mul) \ + M(Neg) \ M(NewArray) \ M(NewInstance) \ M(Not) \ M(NotEqual) \ M(NullCheck) \ M(NullConstant) \ + M(Or) \ M(ParallelMove) \ M(ParameterValue) \ M(Phi) \ M(Return) \ M(ReturnVoid) \ + M(Ror) \ M(Select) \ + M(Shl) \ + M(Shr) \ M(StaticFieldGet) \ + M(StaticFieldSet) \ M(Sub) \ M(SuspendCheck) \ M(Throw) \ M(TryBoundary) \ M(TypeConversion) \ + M(UShr) \ + M(Xor) \ // TODO: Remove once the VIXL32 backend is implemented completely. #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ - M(And) \ - M(ArrayGet) \ - M(ArraySet) \ - M(BooleanNot) \ - M(BoundsCheck) \ M(BoundType) \ M(CheckCast) \ M(ClassTableGet) \ - M(Deoptimize) \ - M(DoubleConstant) \ - M(FloatConstant) \ M(InstanceOf) \ M(InvokeInterface) \ M(InvokeUnresolved) \ M(MonitorOperation) \ M(NativeDebugInfo) \ - M(Neg) \ - M(Or) \ M(PackedSwitch) \ M(Rem) \ - M(Ror) \ - M(Shl) \ - M(Shr) \ - M(StaticFieldSet) \ M(UnresolvedInstanceFieldGet) \ M(UnresolvedInstanceFieldSet) \ M(UnresolvedStaticFieldGet) \ M(UnresolvedStaticFieldSet) \ - M(UShr) \ - M(Xor) \ class CodeGeneratorARMVIXL; @@ -276,11 +276,18 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke); + void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); void HandleCondition(HCondition* condition); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); Location ArithmeticZeroOrFpuRegister(HInstruction* input); + Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); + bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARMVIXL* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -311,7 +318,14 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); + void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); + void HandleIntegerRotate(HRor* ror); + void HandleLongRotate(HRor* ror); + void HandleShift(HBinaryOperation* operation); void GenerateWideAtomicStore(vixl::aarch32::Register addr, uint32_t offset, @@ -339,7 +353,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { Location root, vixl::aarch32::Register obj, uint32_t offset, - bool requires_read_barrier = kEmitCompilerReadBarrier); + bool requires_read_barrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -424,6 +438,17 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); + void LoadFromShiftedRegOffset(Primitive::Type type, + Location out_loc, + vixl::aarch32::Register base, + vixl::aarch32::Register reg_index, + vixl::aarch32::Condition cond = vixl::aarch32::al); + void StoreToShiftedRegOffset(Primitive::Type type, + Location out_loc, + vixl::aarch32::Register base, + vixl::aarch32::Register reg_index, + vixl::aarch32::Condition cond = vixl::aarch32::al); + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 02c1c3b69f..efd33c7025 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -426,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -438,9 +452,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -455,44 +469,211 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(reg); + __ MaybeUnpoisonHeapReference(ref_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); - DCHECK_NE(reg, ESP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in EAX): // - // EAX <- obj + // EAX <- ref // EAX <- ReadBarrierMark(EAX) - // obj <- EAX + // ref <- EAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction, + Location ref, + Register obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + Register temp) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in EAX): + // + // EAX <- ref + // EAX <- ReadBarrierMark(EAX) + // ref <- EAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp_, ref_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save EAX beforehand, and move the + // expected value (stored in `temp_`) into EAX. + __ pushl(EAX); + __ movl(EAX, temp_); + + // Convenience aliases. + Register base = obj_; + Register expected = EAX; + Register value = ref_reg; + + bool base_equals_value = (base == value); + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value` to a temporary register. This way, poisoning + // `value` won't invalidate `base`. + value = temp_; + __ movl(value, base); + } + + // Check that the register allocator did not assign the location + // of `expected` (EAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value, expected); + DCHECK_NE(base, expected); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(value); + } + + __ LockCmpxchgl(field_addr_, value); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(value); + } + // No need to unpoison `expected` (EAX), as it is be overwritten below. + } + + // Restore EAX. + __ popl(EAX); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -5897,7 +6078,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -5929,7 +6110,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -5957,7 +6138,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -6099,7 +6280,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -6831,7 +7012,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -6896,7 +7077,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location ref, Register obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6953,8 +7136,15 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index e7d9a43f58..1b51999546 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -259,12 +259,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + Label* fixup_label, + bool requires_read_barrier); // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. @@ -499,13 +499,24 @@ class CodeGeneratorX86 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires a temporary register, which must be provided as a + // non-null pointer (`temp`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4b64c1b6ff..fcabeeae5d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -445,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -457,10 +471,10 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CpuRegister cpu_reg = obj_.AsRegister<CpuRegister>(); - Register reg = cpu_reg.AsRegister(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -475,44 +489,218 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(cpu_reg); + __ MaybeUnpoisonHeapReference(ref_cpu_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - DCHECK_NE(reg, RSP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // RDI <- obj + // RDI <- ref // RAX <- ReadBarrierMark(RDI) - // obj <- RAX + // ref <- RAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86_64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + CpuRegister temp1, + CpuRegister temp2) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_cpu_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp1_, ref_cpu_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // RDI <- ref + // RAX <- ReadBarrierMark(RDI) + // ref <- RAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp1_, ref_cpu_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achived + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save RAX beforehand, and move the + // expected value (stored in `temp1_`) into EAX. + __ movq(temp2_, CpuRegister(RAX)); + __ movl(CpuRegister(RAX), temp1_); + + // Convenience aliases. + CpuRegister base = obj_; + CpuRegister expected = CpuRegister(RAX); + CpuRegister value = ref_cpu_reg; + + bool base_equals_value = (base.AsRegister() == value.AsRegister()); + Register value_reg = ref_reg; + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value_reg` to a temporary register. This way, poisoning + // `value_reg` won't invalidate `base`. + value_reg = temp1_.AsRegister(); + __ movl(CpuRegister(value_reg), base); + } + + // Check that the register allocator did not assign the location + // of `expected` (RAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value_reg, expected.AsRegister()); + DCHECK_NE(base.AsRegister(), expected.AsRegister()); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(CpuRegister(value_reg)); + } + + __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value_reg` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(CpuRegister(value_reg)); + } + // No need to unpoison `expected` (RAX), as it is be overwritten below. + } + + // Restore RAX. + __ movq(CpuRegister(RAX), temp2_); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const CpuRegister obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const CpuRegister temp1_; + const CpuRegister temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -4122,7 +4310,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { @@ -4569,7 +4757,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { @@ -5318,7 +5506,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -5343,7 +5531,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, address, - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); } else { // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). @@ -5351,7 +5539,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); } generate_null_check = !cls->IsInDexCache(); @@ -5379,7 +5567,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -5496,7 +5684,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { /* no_rip */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -6264,7 +6452,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6330,7 +6518,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction Location ref, CpuRegister obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + CpuRegister* temp1, + CpuRegister* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6387,8 +6578,16 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp1 != nullptr); + DCHECK(temp2 != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 57ef83f621..8b19dad0d0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -253,12 +253,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + Label* fixup_label, + bool requires_read_barrier); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); @@ -434,13 +434,25 @@ class CodeGeneratorX86_64 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires two temporary registers, which must be provided as + // non-null pointers (`temp1` and `temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + CpuRegister* temp1 = nullptr, + CpuRegister* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 5d92bfd9cc..8c08a9c8b9 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -37,11 +37,6 @@ inline dwarf::Reg DWARFReg(vixl::aarch32::SRegister reg) { return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode())); } -inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) { - DCHECK_EQ(reg.GetCode() % 2, 0u) << reg; - return vixl::aarch32::DRegister(reg.GetCode() / 2); -} - inline vixl::aarch32::Register HighRegisterFrom(Location location) { DCHECK(location.IsRegisterPair()) << location; return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl32::Register>()); @@ -135,6 +130,11 @@ inline vixl::aarch32::Register InputRegisterAt(HInstruction* instr, int input_in instr->InputAt(input_index)->GetType()); } +inline vixl::aarch32::Register InputRegister(HInstruction* instr) { + DCHECK_EQ(instr->InputCount(), 1u); + return InputRegisterAt(instr, 0); +} + inline int64_t Int64ConstantFrom(Location location) { HConstant* instr = location.GetConstant(); if (instr->IsIntConstant()) { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index adfe09ba9f..9de521ad8d 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -18,6 +18,7 @@ #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/stl_util.h" #include "ssa_phi_elimination.h" namespace art { @@ -168,8 +169,7 @@ bool HDeadCodeElimination::SimplifyIfs() { bool simplified_one_or_more_ifs = false; bool rerun_dominance_and_loop_analysis = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { HInstruction* last = block->GetLastInstruction(); HInstruction* first = block->GetFirstInstruction(); if (last->IsIf() && @@ -271,20 +271,22 @@ bool HDeadCodeElimination::SimplifyIfs() { } void HDeadCodeElimination::ConnectSuccessiveBlocks() { - // Order does not matter. - for (HReversePostOrderIterator it(*graph_); !it.Done();) { - HBasicBlock* block = it.Current(); - if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) { - it.Advance(); - continue; - } - HBasicBlock* successor = block->GetSingleSuccessor(); - if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { - it.Advance(); - continue; + // Order does not matter. Skip the entry block by starting at index 1 in reverse post order. + for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* block = graph_->GetReversePostOrder()[i]; + DCHECK(!block->IsEntryBlock()); + while (block->GetLastInstruction()->IsGoto()) { + HBasicBlock* successor = block->GetSingleSuccessor(); + if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { + break; + } + DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor)); + block->MergeWith(successor); + --size; + DCHECK_EQ(size, graph_->GetReversePostOrder().size()); + DCHECK_EQ(block, graph_->GetReversePostOrder()[i]); + // Reiterate on this block in case it can be merged with its new successor. } - block->MergeWith(successor); - // Reiterate on this block in case it can be merged with its new successor. } } @@ -300,8 +302,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { // Remove all dead blocks. Iterate in post order because removal needs the // block's chain of dominators and nested loops need to be updated from the // inside out. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { int id = block->GetBlockId(); if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); @@ -332,8 +333,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that // a dead instruction depending on another dead instruction is removed. - for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { - HBasicBlock* block = b.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { // Traverse this block's instructions in backward order and remove // the unused ones. HBackwardInstructionIterator i(block->GetInstructions()); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 1e86b75075..f5931a2f81 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -411,8 +411,8 @@ void GlobalValueNumberer::Run() { // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 1d1921a246..f2602fbf8c 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -103,8 +103,7 @@ void HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. - for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { - HBasicBlock* graph_block = it_graph.Current(); + for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) { // Don't analyze irreducible loops. if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { VisitLoop(graph_block->GetLoopInformation()); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9faa98a388..cc420b3260 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1219,16 +1219,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - HReversePostOrderIterator it(*callee_graph); - it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. size_t number_of_instructions = 0; bool can_inline_environment = total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + // Skip the entry block, it does not contain instructions that prevent inlining. + for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 613e00843f..c8c4ca76fd 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -81,8 +81,7 @@ void HInstructionBuilder::InitializeBlockLocals() { // locals (guaranteed by HGraphBuilder) and that all try blocks have been // visited already (from HTryBoundary scoping and reverse post order). bool catch_block_visited = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : graph_->GetReversePostOrder()) { if (current == current_block_) { catch_block_visited = true; } else if (current->IsTryBlock()) { @@ -276,8 +275,8 @@ bool HInstructionBuilder::Build() { FindNativeDebugInfoLocations(native_debug_info_locations); } - for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) { - current_block_ = block_it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + current_block_ = block; uint32_t block_dex_pc = current_block_->GetDexPc(); InitializeBlockLocals(); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3bb1c1dc21..e4d280f26d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -124,20 +124,16 @@ void InstructionSimplifier::Run() { void InstructionSimplifierVisitor::Run() { // Iterate in reverse post order to open up more simplifications to users // of instructions that got simplified. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) { + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // The simplification of an instruction to another instruction may yield // possibilities for other simplifications. So although we perform a reverse // post order visit, we sometimes need to revisit an instruction index. - simplification_occurred_ = false; - VisitBasicBlock(it.Current()); - if (simplification_occurred_ && - (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) { - // New simplifications may be applicable to the instruction at the - // current index, so don't advance the iterator. - continue; - } + do { + simplification_occurred_ = false; + VisitBasicBlock(block); + } while (simplification_occurred_ && + (simplifications_at_current_position_ < kMaxSamePositionSimplifications)); simplifications_at_current_position_ = 0; - it.Advance(); } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 8327a4c244..fc6ff7b197 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -133,8 +133,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { void IntrinsicsRecognizer::Run() { ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* inst = inst_it.Current(); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 96a6ecbee9..8790c1e4f1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -652,9 +652,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -663,7 +663,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. @@ -891,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -901,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Pointer. locations->AddTemp(Location::RequiresRegister()); // Temp 1. } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) { DCHECK_NE(type, Primitive::kPrimLong); ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); // Boolean result. + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); // Boolean result. - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). - Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. - Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value = locations->InAt(4).AsRegister<Register>(); // Value. - Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. - Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + Location tmp_ptr_loc = locations->GetTemp(0); + Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory. + Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null); + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } } // Prevent reordering with prior memory operations. @@ -942,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ add(tmp_ptr, base, ShifterOperand(offset)); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->PoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not poison `value_lo`, as it is the same register as - // `expected_lo`, which has just been poisoned. + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. } else { - codegen->GetAssembler()->PoisonHeapReference(value_lo); + __ PoisonHeapReference(value); } } @@ -959,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); - __ ldrex(tmp_lo, tmp_ptr); + __ ldrex(tmp, tmp_ptr); - __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + __ subs(tmp, tmp, ShifterOperand(expected)); __ it(EQ, ItState::kItT); - __ strex(tmp_lo, value_lo, tmp_ptr, EQ); - __ cmp(tmp_lo, ShifterOperand(1), EQ); + __ strex(tmp, value, tmp_ptr, EQ); + __ cmp(tmp, ShifterOperand(1), EQ); __ b(&loop_head, EQ); __ dmb(ISH); - __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ rsbs(out, tmp, ShifterOperand(1)); __ it(CC); __ mov(out, ShifterOperand(0), CC); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not unpoison `value_lo`, as it is the same register as - // `expected_lo`, which has just been unpoisoned. + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. } else { - codegen->GetAssembler()->UnpoisonHeapReference(value_lo); + __ UnpoisonHeapReference(value); } } } @@ -998,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e2c1802fdc..db1c022868 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -863,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, + /* offset */ 0u, /* index */ offset_loc, - /* scale_factor */ 0U, + /* scale_factor */ 0u, temp, /* needs_null_check */ false, is_volatile); @@ -880,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); } } } @@ -890,9 +890,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -901,7 +901,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1086,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1096,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { MacroAssembler* masm = codegen->GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = WRegisterFrom(locations->Out()); // Boolean result. + Location out_loc = locations->Out(); + Register out = WRegisterFrom(out_loc); // Boolean result. Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. Register expected = RegisterFrom(locations->InAt(3), type); // Expected. Register value = RegisterFrom(locations->InAt(4), type); // Value. @@ -1118,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Register temp = WRegisterFrom(locations->GetTemp(0)); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + /* scale_factor */ 0u, + temp, + /* needs_null_check */ false, + /* use_load_acquire */ false, + /* always_update_field */ true); + } } UseScratchRegisterScope temps(masm); @@ -1145,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::aarch64::Label loop_head, exit_loop; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); @@ -1179,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1194,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index f41e4d95b5..aae3899847 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2056,9 +2056,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2076,7 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } } else { locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } } @@ -2255,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2278,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty // Force a byte register for the output. locations->SetOut(Location::RegisterLocation(EAX)); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. // Need a byte register for marking. locations->AddTemp(Location::RegisterLocation(ECX)); @@ -2294,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2317,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code Location out = locations->Out(); DCHECK_EQ(out.AsRegister<Register>(), EAX); + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register expected = locations->InAt(3).AsRegister<Register>(); // Ensure `expected` is in EAX (required by the CMPXCHG instruction). DCHECK_EQ(expected, EAX); @@ -2325,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), - locations->GetTemp(1).AsRegister<Register>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + temp1_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp2); + } bool base_equals_value = (base == value); if (kPoisonHeapReferences) { @@ -2337,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // If `base` and `value` are the same register location, move // `value` to a temporary register. This way, poisoning // `value` won't invalidate `base`. - value = locations->GetTemp(0).AsRegister<Register>(); + value = temp1; __ movl(value, base); } @@ -2356,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + __ LockCmpxchgl(field_addr, value); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); @@ -2392,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Ensure the expected value is in EAX (required by the CMPXCHG // instruction). DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), - locations->InAt(4).AsRegister<Register>()); + __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); } else if (type == Primitive::kPrimLong) { // Ensure the expected value is in EAX:EDX and that the new // value is in EBX:ECX (required by the CMPXCHG8B instruction). @@ -2401,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); - __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + __ LockCmpxchg8b(field_addr); } else { LOG(FATAL) << "Unexpected CAS type " << type; } @@ -2409,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we // don't need scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); } @@ -2424,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4b0afca122..cdef22f6de 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2172,9 +2172,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2183,7 +2183,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { @@ -2333,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2347,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty locations->SetOut(Location::RequiresRegister()); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } @@ -2362,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2386,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // Ensure `expected` is in RAX (required by the CMPXCHG instruction). DCHECK_EQ(expected.AsRegister(), RAX); CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), - locations->GetTemp(1).AsRegister<CpuRegister>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp1, + &temp2); + } bool base_equals_value = (base.AsRegister() == value.AsRegister()); Register value_reg = value.AsRegister(); @@ -2404,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // If `base` and `value` are the same register location, move // `value_reg` to a temporary register. This way, poisoning // `value_reg` won't invalidate `base`. - value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister(); + value_reg = temp1.AsRegister(); __ movl(CpuRegister(value_reg), base); } @@ -2423,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); + __ LockCmpxchgl(field_addr, CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); @@ -2468,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); } @@ -2483,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index a0ded74d6d..eb2d18dd88 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -15,6 +15,7 @@ */ #include "licm.h" + #include "side_effects_analysis.h" namespace art { @@ -90,8 +91,7 @@ void LICM::Run() { } // Post order visit to visit inner loops before outer loops. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->IsLoopHeader()) { // Only visit the loop when we reach the header. continue; diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc index 3af212fa48..80cecd41dc 100644 --- a/compiler/optimizing/linear_order.cc +++ b/compiler/optimizing/linear_order.cc @@ -94,8 +94,7 @@ void LinearizeGraph(const HGraph* graph, // for it. ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(), allocator->Adapter(kArenaAllocLinearOrder)); - for (HReversePostOrderIterator it(*graph); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph->GetReversePostOrder()) { size_t number_of_forward_predecessors = block->GetPredecessors().size(); if (block->IsLoopHeader()) { number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h index cdbdd0714b..7122d67be9 100644 --- a/compiler/optimizing/linear_order.h +++ b/compiler/optimizing/linear_order.h @@ -30,16 +30,12 @@ namespace art { // // for (HBasicBlock* block : linear_order) // linear order // -// for (HBasicBlock* block : LinearPostOrder(linear_order)) // linear post order +// for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order // void LinearizeGraph(const HGraph* graph, ArenaAllocator* allocator, ArenaVector<HBasicBlock*>* linear_order); -inline auto LinearPostOrder(const ArenaVector<HBasicBlock*>& linear_order) { - return MakeIterationRange(linear_order.rbegin(), linear_order.rend()); -} - } // namespace art #endif // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index bd74368e17..37b58ded59 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -56,8 +56,7 @@ static void TestCode(const uint16_t* data, const char* expected) { liveness.Analyze(); std::ostringstream buffer; - for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph->GetBlocks()) { buffer << "Block " << block->GetBlockId() << std::endl; size_t ssa_values = liveness.GetNumberOfSsaValues(); BitVector* live_in = liveness.GetLiveInSet(*block); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 820fa29597..b91e9e6868 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -1046,8 +1046,8 @@ void LoadStoreElimination::Run() { return; } HeapLocationCollector heap_location_collector(graph_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - heap_location_collector.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector.VisitBasicBlock(block); } if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { // Bail out if there are too many heap locations to deal with. @@ -1065,8 +1065,8 @@ void LoadStoreElimination::Run() { } heap_location_collector.BuildAliasingMatrix(); LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - lse_visitor.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + lse_visitor.VisitBasicBlock(block); } lse_visitor.RemoveInstructions(); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 59cc0091bf..45c7eb1a46 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -179,16 +179,16 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { } void HGraph::ClearDominanceInformation() { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->ClearDominanceInformation(); + for (HBasicBlock* block : GetReversePostOrder()) { + block->ClearDominanceInformation(); } reverse_post_order_.clear(); } void HGraph::ClearLoopInformation() { SetHasIrreducibleLoops(false); - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->SetLoopInformation(nullptr); + for (HBasicBlock* block : GetReversePostOrder()) { + block->SetLoopInformation(nullptr); } } @@ -275,8 +275,7 @@ void HGraph::ComputeDominanceInformation() { bool update_occurred = true; while (update_occurred) { update_occurred = false; - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { update_occurred |= UpdateDominatorOfSuccessor(block, successor); } @@ -287,8 +286,7 @@ void HGraph::ComputeDominanceInformation() { // Make sure that there are no remaining blocks whose dominator information // needs to be updated. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { DCHECK(!UpdateDominatorOfSuccessor(block, successor)); } @@ -297,8 +295,7 @@ void HGraph::ComputeDominanceInformation() { // Populate `dominated_blocks_` information after computing all dominators. // The potential presence of irreducible loops requires to do it after. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (!block->IsEntryBlock()) { block->GetDominator()->AddDominatedBlock(block); } @@ -375,8 +372,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { void HGraph::ComputeTryBlockInformation() { // Iterate in reverse post order to propagate try membership information from // predecessors to their successors. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (block->IsEntryBlock() || block->IsCatchBlock()) { // Catch blocks after simplification have only exceptional predecessors // and hence are never in tries. @@ -446,8 +442,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const { // We iterate post order to ensure we visit inner loops before outer loops. // `PopulateRecursive` needs this guarantee to know whether a natural loop // contains an irreducible loop. - for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetPostOrder()) { if (block->IsLoopHeader()) { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because @@ -1134,8 +1129,8 @@ void HGraphVisitor::VisitInsertionOrder() { } void HGraphVisitor::VisitReversePostOrder() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } @@ -1986,10 +1981,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the environments in this graph to have the invoke's environment // as parent. { - HReversePostOrderIterator it(*this); - it.Advance(); // Skip the entry block, we do not need to update the entry's suspend check. - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + // Skip the entry block, we do not need to update the entry's suspend check. + for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) { for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { @@ -2070,8 +2063,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Do a reverse post order of the blocks in the callee and do (1), (2), (3) // and (4) to the blocks that apply. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : GetReversePostOrder()) { if (current != exit_block_ && current != entry_block_ && current != first) { DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 257ccea799..6a45149509 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -25,6 +25,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/array_ref.h" +#include "base/iteration_range.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" #include "dex_file.h" @@ -460,10 +461,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return reverse_post_order_; } + ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() { + DCHECK(GetReversePostOrder()[0] == entry_block_); + return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1); + } + + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const { + return ReverseRange(GetReversePostOrder()); + } + const ArenaVector<HBasicBlock*>& GetLinearOrder() const { return linear_order_; } + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const { + return ReverseRange(GetLinearOrder()); + } + bool HasBoundsChecks() const { return has_bounds_checks_; } @@ -6618,58 +6632,6 @@ class HGraphDelegateVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor); }; -class HInsertionOrderIterator : public ValueObject { - public: - explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {} - - bool Done() const { return index_ == graph_.GetBlocks().size(); } - HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator); -}; - -class HReversePostOrderIterator : public ValueObject { - public: - explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == graph_.GetReversePostOrder().size(); } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator); -}; - -class HPostOrderIterator : public ValueObject { - public: - explicit HPostOrderIterator(const HGraph& graph) - : graph_(graph), index_(graph_.GetReversePostOrder().size()) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; } - void Advance() { --index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator); -}; - // Iterator over the blocks that art part of the loop. Includes blocks part // of an inner loop. The order in which the blocks are iterated is on their // block id. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 03870ab1bf..19fd6f95c3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -18,6 +18,8 @@ #include <fstream> #include <memory> +#include <sstream> + #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm @@ -46,6 +48,7 @@ #include "base/arena_containers.h" #include "base/dumpable.h" #include "base/macros.h" +#include "base/mutex.h" #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" @@ -135,14 +138,18 @@ class PassObserver : public ValueObject { PassObserver(HGraph* graph, CodeGenerator* codegen, std::ostream* visualizer_output, - CompilerDriver* compiler_driver) + CompilerDriver* compiler_driver, + Mutex& dump_mutex) : graph_(graph), cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), + visualizer_oss_(), + visualizer_output_(visualizer_output), visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), - visualizer_(visualizer_output, graph, *codegen), + visualizer_(&visualizer_oss_, graph, *codegen), + visualizer_dump_mutex_(dump_mutex), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { if (!IsVerboseMethod(compiler_driver, GetMethodName())) { @@ -160,6 +167,19 @@ class PassObserver : public ValueObject { LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } + if (visualizer_enabled_) { + MutexLock mu(Thread::Current(), visualizer_dump_mutex_); + *visualizer_output_ << visualizer_oss_.str(); + // The destructor of `visualizer_output_` is normally + // responsible for flushing (and closing) the stream, but it + // won't be invoked during fast exits in non-debug mode -- see + // art::Dex2Oat::~Dex2Oat, which explicitly abandons some + // objects (such as the compiler driver) in non-debug mode, to + // avoid the cost of destructing them. Therefore we explicitly + // flush the stream here to prevent truncated CFG visualizer + // files. + visualizer_output_->flush(); + } } void DumpDisassembly() const { @@ -237,8 +257,11 @@ class PassObserver : public ValueObject { DisassemblyInformation disasm_info_; + std::ostringstream visualizer_oss_; + std::ostream* visualizer_output_; bool visualizer_enabled_; HGraphVisualizer visualizer_; + Mutex& visualizer_dump_mutex_; // Flag to be set by the compiler if the pass failed and the graph is not // expected to validate. @@ -369,13 +392,16 @@ class OptimizingCompiler FINAL : public Compiler { std::unique_ptr<std::ostream> visualizer_output_; + mutable Mutex dump_mutex_; // To synchronize visualizer writing. + DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); }; static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {} + : Compiler(driver, kMaximumCompilationTimeBeforeWarning), + dump_mutex_("Visualizer dump lock") {} void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler @@ -383,9 +409,6 @@ void OptimizingCompiler::Init() { CompilerDriver* driver = GetCompilerDriver(); const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); if (!cfg_file_name.empty()) { - CHECK_EQ(driver->GetThreadCount(), 1U) - << "Graph visualizer requires the compiler to run single-threaded. " - << "Invoke the compiler with '-j1'."; std::ios_base::openmode cfg_file_mode = driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); @@ -951,7 +974,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver); + compiler_driver, + dump_mutex_); VLOG(compiler) << "Building " << pass_observer.GetMethodName(); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 7b66ef3627..0db60882db 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -20,8 +20,7 @@ namespace art { void PrepareForRegisterAllocation::Run() { // Order does not matter. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // No need to visit the phis. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index d93c9ddc39..d588deaace 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -123,8 +123,7 @@ void ReferenceTypePropagation::ValidateTypes() { // TODO: move this to the graph checker. if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { HInstruction* instr = iti.Current(); if (instr->GetType() == Primitive::kPrimNot) { @@ -158,8 +157,8 @@ void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } ProcessWorklist(); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 961077419e..aa0d3710fa 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -758,7 +758,7 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { } void RegisterAllocatorGraphColor::ProcessInstructions() { - for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { // Note that we currently depend on this ordering, since some helper // code is designed for linear scan register allocation. for (HBackwardInstructionIterator instr_it(block->GetInstructions()); diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 4e69bc8999..1a391ce9bb 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -163,7 +163,7 @@ void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool void RegisterAllocatorLinearScan::AllocateRegistersInternal() { // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); back_it.Advance()) { ProcessInstruction(back_it.Current()); diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index e409035d9d..46d0d0eb65 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -76,8 +76,7 @@ void HSelectGenerator::Run() { // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. // Otherwise the order does not matter. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; // Find elements of the diamond pattern. diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 1dc69867b4..6d82e8e06d 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -26,8 +26,7 @@ void SideEffectsAnalysis::Run() { // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { SideEffects effects = GetBlockEffects(block); DCHECK(effects.DoesNothing()); if (block->IsLoopHeader()) { @@ -38,9 +37,7 @@ void SideEffectsAnalysis::Run() { } // Do a post order visit to ensure we visit a loop header after its loop body. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + for (HBasicBlock* block : graph_->GetPostOrder()) { SideEffects effects = SideEffects::None(); // Update `effects` with the side effects of all instructions in this block. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 03807ba1ee..ae1e369999 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -25,8 +25,8 @@ namespace art { void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* equality_instr = it.Current(); if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) { continue; @@ -57,8 +57,8 @@ void SsaBuilder::FixNullConstantType() { void SsaBuilder::EquivalentPhisCleanup() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); HPhi* next = phi->GetNextEquivalentPhiWithSameType(); if (next != nullptr) { @@ -79,8 +79,7 @@ void SsaBuilder::EquivalentPhisCleanup() { } void SsaBuilder::FixEnvironmentPhis() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { HPhi* phi = it_phis.Current()->AsPhi(); // If the phi is not dead, or has no environment uses, there is nothing to do. @@ -228,8 +227,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { void SsaBuilder::RunPrimitiveTypePropagation() { ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder)); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { if (block->IsLoopHeader()) { for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HPhi* phi = phi_it.Current()->AsPhi(); diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 76cf8fe1ae..e8e12e1a55 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -139,7 +139,7 @@ static void RecursivelyProcessInputs(HInstruction* current, void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HBasicBlock* block : LinearPostOrder(graph_->GetLinearOrder())) { + for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) { BitVector* kill = GetKillSet(*block); BitVector* live_in = GetLiveInSet(*block); @@ -256,15 +256,13 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() { do { changed = false; - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - const HBasicBlock& block = *it.Current(); - + for (const HBasicBlock* block : graph_->GetPostOrder()) { // The live_in set depends on the kill set (which does not // change in this loop), and the live_out set. If the live_out // set does not change, there is no need to update the live_in set. - if (UpdateLiveOut(block) && UpdateLiveIn(block)) { + if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) { if (kIsDebugBuild) { - CheckNoLiveInIrreducibleLoop(block); + CheckNoLiveInIrreducibleLoop(*block); } changed = true; } diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index b1ec99ab8e..aec7a3c555 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -34,8 +34,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination)); // Add to the worklist phis referenced by non-phi instructions. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); if (phi->IsDead()) { @@ -84,8 +83,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { // Remove phis that are not live. Visit in post order so that phis // that are not inputs of loop phis can be removed when they have // no users left (dead phis might use dead phis). - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { HInstruction* current = block->GetFirstPhi(); HInstruction* next = nullptr; HPhi* phi; @@ -119,8 +117,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { void SsaRedundantPhiElimination::Run() { // Add all phis in the worklist. Order does not matter for correctness, and // neither will necessarily converge faster. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { worklist_.push_back(inst_it.Current()->AsPhi()); } diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index c5575faaa1..68fd32e107 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -94,7 +94,7 @@ class ArmVIXLAssembler FINAL : public Assembler { void StoreRegisterList(RegList regs, size_t stack_offset); bool ShifterOperandCanAlwaysHold(uint32_t immediate); - bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc); + bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc = kCcDontCare); bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, int32_t offset, /*out*/ int32_t* add_to_base, diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 9664e43641..8d2a0e7860 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -79,17 +79,24 @@ class VerifierDepsTest : public CommonCompilerTest { callbacks->SetVerifierDeps(verifier_deps_.get()); } - void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) { - class_loader_ = LoadDex("VerifierDeps"); - std::vector<const DexFile*> dex_files = GetDexFiles(class_loader_); - CHECK_EQ(dex_files.size(), 1u); - dex_file_ = dex_files.front(); - - SetVerifierDeps(dex_files); - - ObjPtr<mirror::ClassLoader> loader = soa->Decode<mirror::ClassLoader>(class_loader_); - class_linker_->RegisterDexFile(*dex_file_, loader.Ptr()); + void LoadDexFile(ScopedObjectAccess* soa, const char* name1, const char* name2 = nullptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + class_loader_ = (name2 == nullptr) ? LoadDex(name1) : LoadMultiDex(name1, name2); + dex_files_ = GetDexFiles(class_loader_); + primary_dex_file_ = dex_files_.front(); + + SetVerifierDeps(dex_files_); + StackHandleScope<1> hs(soa->Self()); + Handle<mirror::ClassLoader> loader = + hs.NewHandle(soa->Decode<mirror::ClassLoader>(class_loader_)); + for (const DexFile* dex_file : dex_files_) { + class_linker_->RegisterDexFile(*dex_file, loader.Get()); + } + } + void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) { + LoadDexFile(soa, "VerifierDeps"); + CHECK_EQ(dex_files_.size(), 1u); klass_Main_ = FindClassByName("LMain;", soa); CHECK(klass_Main_ != nullptr); } @@ -98,16 +105,16 @@ class VerifierDepsTest : public CommonCompilerTest { ScopedObjectAccess soa(Thread::Current()); LoadDexFile(&soa); - StackHandleScope<2> hs(Thread::Current()); + StackHandleScope<2> hs(soa.Self()); Handle<mirror::ClassLoader> class_loader_handle( hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_))); Handle<mirror::DexCache> dex_cache_handle(hs.NewHandle(klass_Main_->GetDexCache())); const DexFile::ClassDef* class_def = klass_Main_->GetClassDef(); - const uint8_t* class_data = dex_file_->GetClassData(*class_def); + const uint8_t* class_data = primary_dex_file_->GetClassData(*class_def); CHECK(class_data != nullptr); - ClassDataItemIterator it(*dex_file_, class_data); + ClassDataItemIterator it(*primary_dex_file_, class_data); while (it.HasNextStaticField() || it.HasNextInstanceField()) { it.Next(); } @@ -115,7 +122,7 @@ class VerifierDepsTest : public CommonCompilerTest { ArtMethod* method = nullptr; while (it.HasNextDirectMethod()) { ArtMethod* resolved_method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>( - *dex_file_, + *primary_dex_file_, it.GetMemberIndex(), dex_cache_handle, class_loader_handle, @@ -131,7 +138,7 @@ class VerifierDepsTest : public CommonCompilerTest { CHECK(method != nullptr); MethodVerifier verifier(Thread::Current(), - dex_file_, + primary_dex_file_, dex_cache_handle, class_loader_handle, *class_def, @@ -148,19 +155,16 @@ class VerifierDepsTest : public CommonCompilerTest { return !verifier.HasFailures(); } - void VerifyDexFile() { + void VerifyDexFile(const char* multidex = nullptr) { std::string error_msg; { ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa); + LoadDexFile(&soa, "VerifierDeps", multidex); } - SetVerifierDeps({ dex_file_ }); TimingLogger timings("Verify", false, false); - std::vector<const DexFile*> dex_files; - dex_files.push_back(dex_file_); compiler_options_->boot_image_ = false; compiler_driver_->InitializeThreadPools(); - compiler_driver_->Verify(class_loader_, dex_files, &timings); + compiler_driver_->Verify(class_loader_, dex_files_, &timings); } bool TestAssignabilityRecording(const std::string& dst, @@ -173,7 +177,7 @@ class VerifierDepsTest : public CommonCompilerTest { DCHECK(klass_dst != nullptr); mirror::Class* klass_src = FindClassByName(src, &soa); DCHECK(klass_src != nullptr); - verifier_deps_->AddAssignability(*dex_file_, + verifier_deps_->AddAssignability(*primary_dex_file_, klass_dst, klass_src, is_strict, @@ -182,9 +186,9 @@ class VerifierDepsTest : public CommonCompilerTest { } bool HasUnverifiedClass(const std::string& cls) { - const DexFile::TypeId* type_id = dex_file_->FindTypeId(cls.c_str()); + const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str()); DCHECK(type_id != nullptr); - uint16_t index = dex_file_->GetIndexForTypeId(*type_id); + uint16_t index = primary_dex_file_->GetIndexForTypeId(*type_id); MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_); for (const auto& dex_dep : verifier_deps_->dex_deps_) { for (uint16_t entry : dex_dep.second->unverified_classes_) { @@ -395,8 +399,21 @@ class VerifierDepsTest : public CommonCompilerTest { has_unverified_classes; } + static std::set<VerifierDeps::MethodResolution>* GetMethods( + VerifierDeps::DexFileDeps* deps, MethodResolutionKind resolution_kind) { + if (resolution_kind == kDirectMethodResolution) { + return &deps->direct_methods_; + } else if (resolution_kind == kVirtualMethodResolution) { + return &deps->virtual_methods_; + } else { + DCHECK_EQ(resolution_kind, kInterfaceMethodResolution); + return &deps->interface_methods_; + } + } + std::unique_ptr<verifier::VerifierDeps> verifier_deps_; - const DexFile* dex_file_; + std::vector<const DexFile*> dex_files_; + const DexFile* primary_dex_file_; jobject class_loader_; mirror::Class* klass_Main_; }; @@ -407,21 +424,21 @@ TEST_F(VerifierDepsTest, StringToId) { MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_); - uint32_t id_Main1 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;"); - ASSERT_LT(id_Main1, dex_file_->NumStringIds()); - ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main1)); + uint32_t id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;"); + ASSERT_LT(id_Main1, primary_dex_file_->NumStringIds()); + ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main1)); - uint32_t id_Main2 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;"); - ASSERT_LT(id_Main2, dex_file_->NumStringIds()); - ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main2)); + uint32_t id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;"); + ASSERT_LT(id_Main2, primary_dex_file_->NumStringIds()); + ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main2)); - uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum"); - ASSERT_GE(id_Lorem1, dex_file_->NumStringIds()); - ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem1)); + uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum"); + ASSERT_GE(id_Lorem1, primary_dex_file_->NumStringIds()); + ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem1)); - uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum"); - ASSERT_GE(id_Lorem2, dex_file_->NumStringIds()); - ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem2)); + uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum"); + ASSERT_GE(id_Lorem2, primary_dex_file_->NumStringIds()); + ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem2)); ASSERT_EQ(id_Main1, id_Main2); ASSERT_EQ(id_Lorem1, id_Lorem2); @@ -1068,13 +1085,41 @@ TEST_F(VerifierDepsTest, EncodeDecode) { ASSERT_TRUE(HasEachKindOfRecord()); std::vector<uint8_t> buffer; - verifier_deps_->Encode(&buffer); + verifier_deps_->Encode(dex_files_, &buffer); ASSERT_FALSE(buffer.empty()); - VerifierDeps decoded_deps({ dex_file_ }, ArrayRef<uint8_t>(buffer)); + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); ASSERT_TRUE(verifier_deps_->Equals(decoded_deps)); } +TEST_F(VerifierDepsTest, EncodeDecodeMulti) { + VerifyDexFile("MultiDex"); + + ASSERT_GT(NumberOfCompiledDexFiles(), 1u); + std::vector<uint8_t> buffer; + verifier_deps_->Encode(dex_files_, &buffer); + ASSERT_FALSE(buffer.empty()); + + // Create new DexFile, to mess with std::map order: the verifier deps used + // to iterate over the map, which doesn't guarantee insertion order. We fixed + // this by passing the expected order when encoding/decoding. + std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles("VerifierDeps"); + std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles("MultiDex"); + std::vector<const DexFile*> dex_files; + for (auto& dex_file : first_dex_files) { + dex_files.push_back(dex_file.get()); + } + for (auto& dex_file : second_dex_files) { + dex_files.push_back(dex_file.get()); + } + + // Dump the new verifier deps to ensure it can properly read the data. + VerifierDeps decoded_deps(dex_files, ArrayRef<const uint8_t>(buffer)); + std::ostringstream stream; + VariableIndentationOutputStream os(&stream); + decoded_deps.Dump(&os); +} + TEST_F(VerifierDepsTest, UnverifiedClasses) { VerifyDexFile(); ASSERT_FALSE(HasUnverifiedClass("LMyThread;")); @@ -1088,5 +1133,311 @@ TEST_F(VerifierDepsTest, UnverifiedClasses) { ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;")); } +// Returns the next resolution kind in the enum. +static MethodResolutionKind GetNextResolutionKind(MethodResolutionKind resolution_kind) { + if (resolution_kind == kDirectMethodResolution) { + return kVirtualMethodResolution; + } else if (resolution_kind == kVirtualMethodResolution) { + return kInterfaceMethodResolution; + } else { + DCHECK_EQ(resolution_kind, kInterfaceMethodResolution); + return kDirectMethodResolution; + } +} + +TEST_F(VerifierDepsTest, VerifyDeps) { + VerifyDexFile(); + + ASSERT_EQ(1u, NumberOfCompiledDexFiles()); + ASSERT_TRUE(HasEachKindOfRecord()); + + // When validating, we create a new class loader, as + // the existing `class_loader_` may contain erroneous classes, + // that ClassLinker::FindClass won't return. + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + MutableHandle<mirror::ClassLoader> new_class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); + { + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_TRUE(verifier_deps_->Verify(new_class_loader, soa.Self())); + } + + std::vector<uint8_t> buffer; + verifier_deps_->Encode(dex_files_, &buffer); + ASSERT_FALSE(buffer.empty()); + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_TRUE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + // Fiddle with the dependencies to make sure we catch any change and fail to verify. + + { + // Mess up with the assignable_types. + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + deps->assignable_types_.insert(*deps->unassignable_types_.begin()); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + // Mess up with the unassignable_types. + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + deps->unassignable_types_.insert(*deps->assignable_types_.begin()); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + // Mess up with classes. + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->classes_) { + if (entry.IsResolved()) { + deps->classes_.insert(VerifierDeps::ClassResolution( + entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->classes_) { + if (!entry.IsResolved()) { + deps->classes_.insert(VerifierDeps::ClassResolution( + entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker - 1)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->classes_) { + if (entry.IsResolved()) { + deps->classes_.insert(VerifierDeps::ClassResolution( + entry.GetDexTypeIndex(), entry.GetAccessFlags() - 1)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + // Mess up with fields. + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->fields_) { + if (entry.IsResolved()) { + deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), + VerifierDeps::kUnresolvedMarker, + entry.GetDeclaringClassIndex())); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->fields_) { + if (!entry.IsResolved()) { + deps->fields_.insert(VerifierDeps::FieldResolution(0 /* we know there is a field there */, + VerifierDeps::kUnresolvedMarker - 1, + 0 /* we know there is a class there */)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->fields_) { + if (entry.IsResolved()) { + deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), + entry.GetAccessFlags() - 1, + entry.GetDeclaringClassIndex())); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + for (const auto& entry : deps->fields_) { + static constexpr uint32_t kNewTypeIndex = 0; + if (entry.GetDeclaringClassIndex() != kNewTypeIndex) { + deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), + entry.GetAccessFlags(), + kNewTypeIndex)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + // Mess up with methods. + for (MethodResolutionKind resolution_kind : + { kDirectMethodResolution, kVirtualMethodResolution, kInterfaceMethodResolution }) { + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + if (entry.IsResolved()) { + methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), + VerifierDeps::kUnresolvedMarker, + entry.GetDeclaringClassIndex())); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + if (!entry.IsResolved()) { + methods->insert(VerifierDeps::MethodResolution(0 /* we know there is a method there */, + VerifierDeps::kUnresolvedMarker - 1, + 0 /* we know there is a class there */)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + if (entry.IsResolved()) { + methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), + entry.GetAccessFlags() - 1, + entry.GetDeclaringClassIndex())); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + static constexpr uint32_t kNewTypeIndex = 0; + if (entry.IsResolved() && entry.GetDeclaringClassIndex() != kNewTypeIndex) { + methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), + entry.GetAccessFlags(), + kNewTypeIndex)); + found = true; + break; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + if (entry.IsResolved()) { + GetMethods(deps, GetNextResolutionKind(resolution_kind))->insert( + VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), + entry.GetAccessFlags(), + entry.GetDeclaringClassIndex())); + found = true; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + + { + VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); + VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); + bool found = false; + std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind); + for (const auto& entry : *methods) { + if (entry.IsResolved()) { + GetMethods(deps, GetNextResolutionKind(GetNextResolutionKind(resolution_kind)))->insert( + VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), + entry.GetAccessFlags(), + entry.GetDeclaringClassIndex())); + found = true; + } + } + ASSERT_TRUE(found); + new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); + ASSERT_FALSE(decoded_deps.Verify(new_class_loader, soa.Self())); + } + } +} + } // namespace verifier } // namespace art |