diff options
74 files changed, 1166 insertions, 484 deletions
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk index b1644df529..e213dc4750 100644 --- a/build/Android.common_path.mk +++ b/build/Android.common_path.mk @@ -38,7 +38,7 @@ ART_TARGET_TEST_OUT := $(TARGET_OUT_DATA)/art-test ifneq ($(TMPDIR),) ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID) else -ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo $$PPID) +ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID) endif # core.oat location on the device. diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index 9b4042c590..f05648c58f 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -462,7 +462,7 @@ static gc::CollectorType ParseCollectorType(const std::string& option) { struct XGcOption { // These defaults are used when the command line arguments for -Xgc: // are either omitted completely or partially. - gc::CollectorType collector_type_ = kUseReadBarrier ? + gc::CollectorType collector_type_ = kUseReadBarrier ? // If RB is enabled (currently a build-time decision), // use CC as the default GC. gc::kCollectorTypeCC : @@ -473,6 +473,7 @@ struct XGcOption { bool verify_pre_gc_rosalloc_ = kIsDebugBuild; bool verify_pre_sweeping_rosalloc_ = false; bool verify_post_gc_rosalloc_ = false; + bool measure_ = kIsDebugBuild; bool gcstress_ = false; }; @@ -515,6 +516,8 @@ struct CmdlineType<XGcOption> : CmdlineTypeParser<XGcOption> { xgc.gcstress_ = true; } else if (gc_option == "nogcstress") { xgc.gcstress_ = false; + } else if (gc_option == "measure") { + xgc.measure_ = true; } else if ((gc_option == "precise") || (gc_option == "noprecise") || (gc_option == "verifycardtable") || diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 47e6625d07..5e6e175c67 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -80,7 +80,11 @@ class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { virtual void EmitNativeCode(CodeGenerator* codegen) = 0; + // Save live core and floating-point caller-save registers and + // update the stack mask in `locations` for registers holding object + // references. virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); + // Restore live core and floating-point caller-save registers. virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); bool IsCoreRegisterSaved(int reg) const { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 47bafb5d6f..236ed20fc0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -437,11 +437,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // R0 (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); DCHECK_NE(reg, SP); @@ -469,8 +467,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -4437,6 +4433,10 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); Primitive::Type type = instruction->GetType(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (type) { case Primitive::kPrimBoolean: @@ -4451,8 +4451,21 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LoadOperandType load_type = GetLoadOperandType(type); __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset); } else { - __ add(IP, obj, ShifterOperand(data_offset)); - codegen_->LoadFromShiftedRegOffset(type, out_loc, IP, index.AsRegister<Register>()); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); + } + temp = obj; + } else { + __ add(temp, obj, ShifterOperand(data_offset)); + } + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); } break; } @@ -4481,8 +4494,21 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); } else { - __ add(IP, obj, ShifterOperand(data_offset)); - codegen_->LoadFromShiftedRegOffset(type, out_loc, IP, index.AsRegister<Register>()); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); + } + temp = obj; + } else { + __ add(temp, obj, ShifterOperand(data_offset)); + } + codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); // If read barriers are enabled, emit read barriers other than @@ -4585,6 +4611,10 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); Location value_loc = locations->InAt(2); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (value_type) { case Primitive::kPrimBoolean: @@ -4599,10 +4629,23 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { StoreOperandType store_type = GetStoreOperandType(value_type); __ StoreToOffset(store_type, value_loc.AsRegister<Register>(), array, full_offset); } else { - __ add(IP, array, ShifterOperand(data_offset)); + Register temp = IP; + + if (has_intermediate_address) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* tmp = array_instr->AsIntermediateAddress(); + DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset); + } + temp = array; + } else { + __ add(temp, array, ShifterOperand(data_offset)); + } codegen_->StoreToShiftedRegOffset(value_type, value_loc, - IP, + temp, index.AsRegister<Register>()); } break; @@ -4610,6 +4653,9 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { Register value = value_loc.AsRegister<Register>(); + // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet. + // See the comment in instruction_simplifier_shared.cc. + DCHECK(!has_intermediate_address); if (instruction->InputAt(2)->IsNullConstant()) { // Just setting null. @@ -4832,6 +4878,37 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } +void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + + if (second.IsRegister()) { + __ add(out.AsRegister<Register>(), + first.AsRegister<Register>(), + ShifterOperand(second.AsRegister<Register>())); + } else { + __ AddConstant(out.AsRegister<Register>(), + first.AsRegister<Register>(), + second.GetConstant()->AsIntConstant()->GetValue()); + } +} + void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index a2d126dd5a..76b07979f5 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -603,11 +603,9 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // W0 (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); DCHECK_NE(obj_.reg(), LR); @@ -635,8 +633,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } @@ -690,10 +686,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress())); + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); @@ -1983,9 +1978,8 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } } -void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. +void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1994,10 +1988,9 @@ void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddr locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( - HArm64IntermediateAddress* instruction) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. +void InstructionCodeGeneratorARM64::VisitIntermediateAddress( + HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), @@ -2097,9 +2090,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. Register temp = temps.AcquireW(); - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -2112,15 +2104,15 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { source = HeapOperand(obj, offset); } else { Register temp = temps.AcquireSameSizeAs(obj); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { + if (instruction->GetArray()->IsIntermediateAddress()) { // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); } temp = obj; @@ -2204,15 +2196,15 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } else { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { + if (instruction->GetArray()->IsIntermediateAddress()) { // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. + // HIntermediateAddress instruction yet. DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); } temp = array; @@ -2228,7 +2220,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } else { DCHECK(needs_write_barrier); - DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); vixl::aarch64::Label done; SlowPathCodeARM64* slow_path = nullptr; { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c5c0aad2c3..82baaa0443 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -472,11 +472,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // EAX (if it is live), as it is clobbered by functions - // art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); DCHECK_NE(reg, ESP); @@ -502,8 +500,6 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 900c79055a..b6ba30e154 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -493,11 +493,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - // Save live registers before the runtime call, and in particular - // RDI and/or RAX (if they are live), as they are clobbered by - // functions art_quick_read_barrier_mark_regX. - SaveLiveRegisters(codegen, locations); - + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); DCHECK_NE(reg, RSP); @@ -523,8 +521,6 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); - - RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index d2afa5b914..af0ee4e197 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -227,7 +227,7 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return vixl::aarch64::Assembler::IsImmMovn(value, vixl::aarch64::kXRegSize); } else { DCHECK(instr->IsAdd() || - instr->IsArm64IntermediateAddress() || + instr->IsIntermediateAddress() || instr->IsBoundsCheck() || instr->IsCompare() || instr->IsCondition() || diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index cd026b8770..495f3fd232 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -14,8 +14,10 @@ * limitations under the License. */ +#include "code_generator.h" #include "instruction_simplifier_arm.h" #include "instruction_simplifier_shared.h" +#include "mirror/array-inl.h" namespace art { namespace arm { @@ -38,6 +40,46 @@ void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) { } } +void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { + size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + Primitive::Type type = instruction->GetType(); + + if (type == Primitive::kPrimLong + || type == Primitive::kPrimFloat + || type == Primitive::kPrimDouble) { + // T32 doesn't support ShiftedRegOffset mem address mode for these types + // to enable optimization. + return; + } + + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } +} + +void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { + size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); + size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); + Primitive::Type type = instruction->GetComponentType(); + + if (type == Primitive::kPrimLong + || type == Primitive::kPrimFloat + || type == Primitive::kPrimDouble) { + // T32 doesn't support ShiftedRegOffset mem address mode for these types + // to enable optimization. + return; + } + + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } +} } // namespace arm } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 14c940eb21..3d297dacc0 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -38,6 +38,8 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { void VisitMul(HMul* instruction) OVERRIDE; void VisitOr(HOr* instruction) OVERRIDE; void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) OVERRIDE; + void VisitArraySet(HArraySet* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 983d31d168..6d107d571f 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -28,56 +28,6 @@ using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; using helpers::ShifterOperandSupportsExtension; -void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, - HInstruction* array, - HInstruction* index, - size_t data_offset) { - if (kEmitCompilerReadBarrier) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - // - // TODO: Handle this case properly in the ARM64 code generator and - // re-enable this optimization; otherwise, remove this TODO. - // b/26601270 - return; - } - if (index->IsConstant() || - (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { - // When the index is a constant all the addressing can be fitted in the - // memory access instruction, so do not split the access. - return; - } - if (access->IsArraySet() && - access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { - // The access may require a runtime call or the original array pointer. - return; - } - - // Proceed to extract the base address computation. - ArenaAllocator* arena = GetGraph()->GetArena(); - - HIntConstant* offset = GetGraph()->GetIntConstant(data_offset); - HArm64IntermediateAddress* address = - new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc); - address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); - access->GetBlock()->InsertInstructionBefore(address, access); - access->ReplaceInput(address, 0); - // Both instructions must depend on GC to prevent any instruction that can - // trigger GC to be inserted between the two. - access->AddSideEffects(SideEffects::DependsOnGC()); - DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); - DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); - // TODO: Code generation for HArrayGet and HArraySet will check whether the input address - // is an HArm64IntermediateAddress and generate appropriate code. - // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe - // `HArm64Load` and `HArm64Store`). We defer these changes because these new instructions would - // not bring any advantages yet. - // Also see the comments in - // `InstructionCodeGeneratorARM64::VisitArrayGet()` and - // `InstructionCodeGeneratorARM64::VisitArraySet()`. - RecordSimplification(); -} - bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { @@ -190,19 +140,23 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - TryExtractArrayAccessAddress(instruction, - instruction->GetArray(), - instruction->GetIndex(), - data_offset); + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } } void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); - TryExtractArrayAccessAddress(instruction, - instruction->GetArray(), - instruction->GetIndex(), - data_offset); + if (TryExtractArrayAccessAddress(instruction, + instruction->GetArray(), + instruction->GetIndex(), + data_offset)) { + RecordSimplification(); + } } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4735f85ab0..28648b3bea 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -35,10 +35,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { } } - void TryExtractArrayAccessAddress(HInstruction* access, - HInstruction* array, - HInstruction* index, - size_t data_offset); bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index dab1ebc16d..8f7778fe68 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -226,4 +226,59 @@ bool TryMergeNegatedInput(HBinaryOperation* op) { return false; } + +bool TryExtractArrayAccessAddress(HInstruction* access, + HInstruction* array, + HInstruction* index, + size_t data_offset) { + if (kEmitCompilerReadBarrier) { + // The read barrier instrumentation does not support the + // HIntermediateAddress instruction yet. + // + // TODO: Handle this case properly in the ARM64 and ARM code generator and + // re-enable this optimization; otherwise, remove this TODO. + // b/26601270 + return false; + } + if (index->IsConstant() || + (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { + // When the index is a constant all the addressing can be fitted in the + // memory access instruction, so do not split the access. + return false; + } + if (access->IsArraySet() && + access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { + // The access may require a runtime call or the original array pointer. + return false; + } + + // Proceed to extract the base address computation. + HGraph* graph = access->GetBlock()->GetGraph(); + ArenaAllocator* arena = graph->GetArena(); + + HIntConstant* offset = graph->GetIntConstant(data_offset); + HIntermediateAddress* address = + new (arena) HIntermediateAddress(array, offset, kNoDexPc); + address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); + access->GetBlock()->InsertInstructionBefore(address, access); + access->ReplaceInput(address, 0); + // Both instructions must depend on GC to prevent any instruction that can + // trigger GC to be inserted between the two. + access->AddSideEffects(SideEffects::DependsOnGC()); + DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); + DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); + // TODO: Code generation for HArrayGet and HArraySet will check whether the input address + // is an HIntermediateAddress and generate appropriate code. + // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe + // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes + // because these new instructions would not bring any advantages yet. + // Also see the comments in + // `InstructionCodeGeneratorARM::VisitArrayGet()` + // `InstructionCodeGeneratorARM::VisitArraySet()` + // `InstructionCodeGeneratorARM64::VisitArrayGet()` + // `InstructionCodeGeneratorARM64::VisitArraySet()`. + return true; +} + + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index b1fe8f4756..56804f5e90 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -26,6 +26,11 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); // a negated bitwise instruction. bool TryMergeNegatedInput(HBinaryOperation* op); +bool TryExtractArrayAccessAddress(HInstruction* access, + HInstruction* array, + HInstruction* index, + size_t data_offset); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0f0ef26ea9..23ac457568 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1289,7 +1289,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ M(BitwiseNegatedRight, Instruction) \ - M(MultiplyAccumulate, Instruction) + M(MultiplyAccumulate, Instruction) \ + M(IntermediateAddress, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_arm @@ -1303,8 +1304,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64DataProcWithShifterOp, Instruction) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_mips diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 06b073c3e2..3f88717c2a 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -94,32 +94,6 @@ class HArm64DataProcWithShifterOp FINAL : public HExpression<2> { std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); -// This instruction computes an intermediate address pointing in the 'middle' of an object. The -// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is -// never used across anything that can trigger GC. -class HArm64IntermediateAddress FINAL : public HExpression<2> { - public: - HArm64IntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) { - SetRawInputAt(0, base_address); - SetRawInputAt(1, offset); - } - - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - bool IsActualObject() const OVERRIDE { return false; } - - HInstruction* GetBaseAddress() const { return InputAt(0); } - HInstruction* GetOffset() const { return InputAt(1); } - - DECLARE_INSTRUCTION(Arm64IntermediateAddress); - - private: - DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); -}; - } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index f2d5cf3253..8bd8667f84 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -113,6 +113,34 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight); }; + +// This instruction computes an intermediate address pointing in the 'middle' of an object. The +// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is +// never used across anything that can trigger GC. +class HIntermediateAddress FINAL : public HExpression<2> { + public: + HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) + : HExpression(Primitive::kPrimNot, SideEffects::DependsOnGC(), dex_pc) { + SetRawInputAt(0, base_address); + SetRawInputAt(1, offset); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetBaseAddress() const { return InputAt(0); } + HInstruction* GetOffset() const { return InputAt(1); } + + DECLARE_INSTRUCTION(IntermediateAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); +}; + + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 77ae10adfc..0bca186814 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -448,8 +448,12 @@ static void RunArchOptimizations(InstructionSet instruction_set, arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN_after_arch"); HOptimization* arm_optimizations[] = { simplifier, + side_effects, + gvn, fixups }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index e48a164667..966587d772 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -33,7 +33,9 @@ extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. +extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*); @@ -119,7 +121,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - qpoints->pReadBarrierMarkReg00 = artReadBarrierMark; + qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00; qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01; qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02; qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index f9c34f57e8..34d3158c62 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -672,6 +672,12 @@ END art_quick_check_cast .endif .endm +// Save rReg's value to [sp, #offset]. +.macro PUSH_REG rReg, offset + str \rReg, [sp, #\offset] @ save rReg + .cfi_rel_offset \rReg, \offset +.endm + /* * Macro to insert read barrier, only used in art_quick_aput_obj. * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. @@ -1752,30 +1758,83 @@ END art_quick_l2f /* * Create a function `name` calling the ReadBarrier::Mark routine, * getting its argument and returning its result through register - * `reg`, thus following a non-standard runtime calling convention: - * - `reg` is used to pass the (sole) argument of this function + * `reg`, saving and restoring all caller-save registers. + * + * If `reg` is different from `r0`, the generated function follows a + * non-standard runtime calling convention: + * - register `reg` is used to pass the (sole) argument of this + * function (instead of R0); + * - register `reg` is used to return the result of this function * (instead of R0); - * - `reg` is used to return the result of this function (instead of R0); * - R0 is treated like a normal (non-argument) caller-save register; * - everything else is the same as in the standard runtime calling - * convention (e.g. same callee-save registers). + * convention (e.g. standard callee-save registers are preserved). */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - push {lr} @ save return address - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset lr, 0 - sub sp, #4 @ push padding (native calling convention 8-byte alignment) - .cfi_adjust_cfa_offset 4 - mov r0, \reg @ pass arg1 - obj from `reg` - bl artReadBarrierMark @ artReadBarrierMark(obj) - mov \reg, r0 @ return result into `reg` - add sp, #4 @ pop padding - .cfi_adjust_cfa_offset -4 - pop {pc} @ return + push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r4, 16 + .cfi_rel_offset r9, 20 + .cfi_rel_offset r12, 24 + .cfi_rel_offset lr, 28 + vpush {s0-s15} @ save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 + + .ifnc \reg, r0 + mov r0, \reg @ pass arg1 - obj from `reg` + .endif + bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) + + vpop {s0-s15} @ restore floating-point registers + .cfi_adjust_cfa_offset -64 + @ If `reg` is a caller-save register, save the result to its + @ corresponding stack slot; it will be restored by the "pop" + @ instruction below. Otherwise, move result into `reg`. + @ + @ (Note that saving `reg` to its stack slot will overwrite the value + @ previously stored by the "push" instruction above. That is + @ alright, as in that case we know that `reg` is not a live + @ register, as it is used to pass the argument and return the result + @ of this function.) + .ifc \reg, r0 + PUSH_REG r0, 0 @ copy result to r0's stack location + .else + .ifc \reg, r1 + PUSH_REG r0, 4 @ copy result to r1's stack location + .else + .ifc \reg, r2 + PUSH_REG r0, 8 @ copy result to r2's stack location + .else + .ifc \reg, r3 + PUSH_REG r0, 12 @ copy result to r3's stack location + .else + .ifc \reg, r4 + PUSH_REG r0, 16 @ copy result to r4's stack location + .else + .ifc \reg, r9 + PUSH_REG r0, 20 @ copy result to r9's stack location + .else + .ifc \reg, r12 + PUSH_REG r0, 24 @ copy result to r12's stack location + .else + mov \reg, r0 @ return result into `reg` + .endif + .endif + .endif + .endif + .endif + .endif + .endif + pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return END \name .endm +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 5385a2f46e..2e5f5ad89f 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -33,7 +33,9 @@ extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. +extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*); @@ -122,7 +124,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - qpoints->pReadBarrierMarkReg00 = artReadBarrierMark; + qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00; qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01; qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02; qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index c893e777d4..6173ae71e1 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1253,6 +1253,22 @@ END art_quick_check_cast .endif .endm +// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude. +// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude. +.macro POP_REGS_NE xReg1, xReg2, offset, xExclude + .ifc \xReg1, \xExclude + ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2 + .else + .ifc \xReg2, \xExclude + ldr \xReg1, [sp, #\offset] // restore xReg1 + .else + ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2 + .endif + .endif + .cfi_restore \xReg1 + .cfi_restore \xReg2 +.endm + /* * Macro to insert read barrier, only used in art_quick_aput_obj. * xDest, wDest and xObj are registers, offset is a defined literal such as @@ -2222,56 +2238,148 @@ END art_quick_indexof /* * Create a function `name` calling the ReadBarrier::Mark routine, - * getting its argument and returning its result through register - * `reg`, thus following a non-standard runtime calling convention: - * - `reg` is used to pass the (sole) argument of this function + * getting its argument and returning its result through W register + * `wreg` (corresponding to X register `xreg`), saving and restoring + * all caller-save registers. + * + * If `wreg` is different from `w0`, the generated function follows a + * non-standard runtime calling convention: + * - register `wreg` is used to pass the (sole) argument of this + * function (instead of W0); + * - register `wreg` is used to return the result of this function * (instead of W0); - * - `reg` is used to return the result of this function (instead of W0); * - W0 is treated like a normal (non-argument) caller-save register; * - everything else is the same as in the standard runtime calling - * convention (e.g. same callee-save registers). + * convention (e.g. standard callee-save registers are preserved). */ -.macro READ_BARRIER_MARK_REG name, reg +.macro READ_BARRIER_MARK_REG name, wreg, xreg ENTRY \name - str xLR, [sp, #-16]! // Save return address and add padding (16B align stack). - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset x30, 0 - mov w0, \reg // Pass arg1 - obj from `reg` + /* + * Allocate 46 stack slots * 8 = 368 bytes: + * - 20 slots for core registers X0-X19 + * - 24 slots for floating-point registers D0-D7 and D16-D31 + * - 1 slot for return address register XLR + * - 1 padding slot for 16-byte stack alignment + */ + // Save all potentially live caller-save core registers. + stp x0, x1, [sp, #-368]! + .cfi_adjust_cfa_offset 368 + .cfi_rel_offset x0, 0 + .cfi_rel_offset x1, 8 + stp x2, x3, [sp, #16] + .cfi_rel_offset x2, 16 + .cfi_rel_offset x3, 24 + stp x4, x5, [sp, #32] + .cfi_rel_offset x4, 32 + .cfi_rel_offset x5, 40 + stp x6, x7, [sp, #48] + .cfi_rel_offset x6, 48 + .cfi_rel_offset x7, 56 + stp x8, x9, [sp, #64] + .cfi_rel_offset x8, 64 + .cfi_rel_offset x9, 72 + stp x10, x11, [sp, #80] + .cfi_rel_offset x10, 80 + .cfi_rel_offset x11, 88 + stp x12, x13, [sp, #96] + .cfi_rel_offset x12, 96 + .cfi_rel_offset x13, 104 + stp x14, x15, [sp, #112] + .cfi_rel_offset x14, 112 + .cfi_rel_offset x15, 120 + stp x16, x17, [sp, #128] + .cfi_rel_offset x16, 128 + .cfi_rel_offset x17, 136 + stp x18, x19, [sp, #144] + .cfi_rel_offset x18, 144 + .cfi_rel_offset x19, 152 + // Save all potentially live caller-save floating-point registers. + stp d0, d1, [sp, #160] + stp d2, d3, [sp, #176] + stp d4, d5, [sp, #192] + stp d6, d7, [sp, #208] + stp d16, d17, [sp, #224] + stp d18, d19, [sp, #240] + stp d20, d21, [sp, #256] + stp d22, d23, [sp, #272] + stp d24, d25, [sp, #288] + stp d26, d27, [sp, #304] + stp d28, d29, [sp, #320] + stp d30, d31, [sp, #336] + // Save return address. + str xLR, [sp, #352] + .cfi_rel_offset x30, 352 + // (sp + #360 is a padding slot) + + .ifnc \wreg, w0 + mov w0, \wreg // Pass arg1 - obj from `wreg` + .endif bl artReadBarrierMark // artReadBarrierMark(obj) - mov \reg, w0 // Return result into `reg` - ldr xLR, [sp], #16 // Restore return address and remove padding. + .ifnc \wreg, w0 + mov \wreg, w0 // Return result into `wreg` + .endif + + // Restore core regs, except `xreg`, as `wreg` is used to return the + // result of this function (simply remove it from the stack instead). + POP_REGS_NE x0, x1, 0, \xreg + POP_REGS_NE x2, x3, 16, \xreg + POP_REGS_NE x4, x5, 32, \xreg + POP_REGS_NE x6, x7, 48, \xreg + POP_REGS_NE x8, x9, 64, \xreg + POP_REGS_NE x10, x11, 80, \xreg + POP_REGS_NE x12, x13, 96, \xreg + POP_REGS_NE x14, x15, 112, \xreg + POP_REGS_NE x16, x17, 128, \xreg + POP_REGS_NE x18, x19, 144, \xreg + // Restore floating-point registers. + ldp d0, d1, [sp, #160] + ldp d2, d3, [sp, #176] + ldp d4, d5, [sp, #192] + ldp d6, d7, [sp, #208] + ldp d16, d17, [sp, #224] + ldp d18, d19, [sp, #240] + ldp d20, d21, [sp, #256] + ldp d22, d23, [sp, #272] + ldp d24, d25, [sp, #288] + ldp d26, d27, [sp, #304] + ldp d28, d29, [sp, #320] + ldp d30, d31, [sp, #336] + // Restore return address and remove padding. + ldr xLR, [sp, #352] .cfi_restore x30 - .cfi_adjust_cfa_offset -16 + add sp, sp, #368 + .cfi_adjust_cfa_offset -368 ret END \name .endm -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28 +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29 diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index b19aa01712..b02edb6aba 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -28,8 +28,8 @@ namespace art { // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, - const mirror::Class* ref_class); +extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass, + const mirror::Class* ref_class); // Math entrypoints. extern int32_t CmpgDouble(double a, double b); extern int32_t CmplDouble(double a, double b); diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 8f13d58de2..4e9756c54e 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -31,7 +31,8 @@ extern "C" size_t art_quick_is_assignable(const mirror::Class* klass, // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index e75fecba4b..77e04e7981 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1908,41 +1908,73 @@ DEFINE_FUNCTION art_nested_signal_return UNREACHABLE END_FUNCTION art_nested_signal_return -// Call the ReadBarrierMark entry point, getting input and returning -// result through EAX (register 0), following the standard runtime -// calling convention. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg00 - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) - PUSH eax // pass arg1 - obj - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - addl LITERAL(12), %esp // pop argument and remove padding - CFI_ADJUST_CFA_OFFSET(-12) - ret -END_FUNCTION art_quick_read_barrier_mark_reg00 - // Create a function `name` calling the ReadBarrier::Mark routine, // getting its argument and returning its result through register -// `reg`, thus following a non-standard runtime calling convention: -// - `reg` is used to pass the (sole) argument of this function +// `reg`, saving and restoring all caller-save registers. +// +// If `reg` is different from `eax`, the generated function follows a +// non-standard runtime calling convention: +// - register `reg` is used to pass the (sole) argument of this function +// (instead of EAX); +// - register `reg` is used to return the result of this function // (instead of EAX); -// - `reg` is used to return the result of this function (instead of EAX); // - EAX is treated like a normal (non-argument) caller-save register; // - everything else is the same as in the standard runtime calling -// convention (e.g. same callee-save registers). +// convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) + // Save all potentially live caller-save core registers. + PUSH eax + PUSH ecx + PUSH edx + PUSH ebx + // 8-byte align the stack to improve (8-byte) XMM register saving and restoring. + // and create space for caller-save floating-point registers. + subl MACRO_LITERAL(4 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(4 + 8 * 8) + // Save all potentially live caller-save floating-point registers. + movsd %xmm0, 0(%esp) + movsd %xmm1, 8(%esp) + movsd %xmm2, 16(%esp) + movsd %xmm3, 24(%esp) + movsd %xmm4, 32(%esp) + movsd %xmm5, 40(%esp) + movsd %xmm6, 48(%esp) + movsd %xmm7, 56(%esp) + + subl LITERAL(4), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(4) PUSH RAW_VAR(reg) // pass arg1 - obj from `reg` call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movl %eax, REG_VAR(reg) // return result into `reg` - addl LITERAL(12), %esp // pop argument and remove padding - CFI_ADJUST_CFA_OFFSET(-12) + .ifnc RAW_VAR(reg), eax + movl %eax, REG_VAR(reg) // return result into `reg` + .endif + addl LITERAL(8), %esp // pop argument and remove padding + CFI_ADJUST_CFA_OFFSET(-8) + + // Restore floating-point registers. + movsd 0(%esp), %xmm0 + movsd 8(%esp), %xmm1 + movsd 16(%esp), %xmm2 + movsd 24(%esp), %xmm3 + movsd 32(%esp), %xmm4 + movsd 40(%esp), %xmm5 + movsd 48(%esp), %xmm6 + movsd 56(%esp), %xmm7 + // Remove floating-point registers and padding. + addl MACRO_LITERAL(8 * 8 + 4), %esp + CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4)) + // Restore core regs, except `reg`, as it is used to return the + // result of this function (simply remove it from the stack instead). + POP_REG_NE ebx, RAW_VAR(reg) + POP_REG_NE edx, RAW_VAR(reg) + POP_REG_NE ecx, RAW_VAR(reg) + POP_REG_NE eax, RAW_VAR(reg) ret END_FUNCTION VAR(name) END_MACRO +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index cf0039c84e..c4e723c483 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -52,7 +52,7 @@ #define LITERAL(value) $value #if defined(__APPLE__) - #define MACRO_LITERAL(value) $$(value) + #define MACRO_LITERAL(value) $(value) #else #define MACRO_LITERAL(value) $value #endif diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index b566fb1ced..c2e3023b87 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -34,7 +34,8 @@ extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass, // Read barrier entrypoints. // art_quick_read_barrier_mark_regX uses an non-standard calling // convention: it expects its input in register X and returns its -// result in that same register. +// result in that same register, and saves and restores all +// caller-save registers. extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*); diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 496e6a8b4a..784ec394a8 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1815,73 +1815,93 @@ DEFINE_FUNCTION art_nested_signal_return UNREACHABLE END_FUNCTION art_nested_signal_return -// Call the ReadBarrier::Mark routine, getting argument and returning -// result through RAX (register 0), thus following a non-standard -// runtime calling convention: -// - RAX is used to pass the (sole) argument of this function (instead -// of RDI); -// - RDI is treated like a normal (non-argument) caller-save register; -// - everything else is the same as in the standard runtime calling -// convention; in particular, RAX is still used to return the result -// of this function. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg00 - SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - movq %rax, %rdi // Pass arg1 - obj from RAX. - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) - RESTORE_FP_CALLEE_SAVE_FRAME - ret -END_FUNCTION art_quick_read_barrier_mark_reg00 - -// Call the ReadBarrier::Mark routine, getting argument and returning -// result through RDI (register 7), thus following a non-standard -// runtime calling convention: -// - RDI is used to return the result of this function (instead of RAX); -// - RAX is treated like a normal (non-result) caller-save register; -// - everything else is the same as in the standard runtime calling -// convention; in particular, RDI is still used to pass the (sole) -// argument of this function. -DEFINE_FUNCTION art_quick_read_barrier_mark_reg07 - SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movq %rax, %rdi // Return result into RDI. - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) - RESTORE_FP_CALLEE_SAVE_FRAME - ret -END_FUNCTION art_quick_read_barrier_mark_reg07 - // Create a function `name` calling the ReadBarrier::Mark routine, // getting its argument and returning its result through register -// `reg`, thus following a non-standard runtime calling convention: -// - `reg` is used to pass the (sole) argument of this function (instead -// of RDI); -// - `reg` is used to return the result of this function (instead of RAX); -// - RDI is treated like a normal (non-argument) caller-save register; -// - RAX is treated like a normal (non-result) caller-save register; +// `reg`, saving and restoring all caller-save registers. +// +// The generated function follows a non-standard runtime calling +// convention: +// - register `reg` (which may be different from RDI) is used to pass +// the (sole) argument of this function; +// - register `reg` (which may be different from RAX) is used to return +// the result of this function (instead of RAX); +// - if `reg` is different from `rdi`, RDI is treated like a normal +// (non-argument) caller-save register; +// - if `reg` is different from `rax`, RAX is treated like a normal +// (non-result) caller-save register; // - everything else is the same as in the standard runtime calling -// convention (e.g. same callee-save registers). +// convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) + // Save all potentially live caller-save core registers. + PUSH rax + PUSH rcx + PUSH rdx + PUSH rsi + PUSH rdi + PUSH r8 + PUSH r9 + PUSH r10 + PUSH r11 + // Create space for caller-save floating-point registers. + subq MACRO_LITERAL(12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(12 * 8) + // Save all potentially live caller-save floating-point registers. + movq %xmm0, 0(%rsp) + movq %xmm1, 8(%rsp) + movq %xmm2, 16(%rsp) + movq %xmm3, 24(%rsp) + movq %xmm4, 32(%rsp) + movq %xmm5, 40(%rsp) + movq %xmm6, 48(%rsp) + movq %xmm7, 56(%rsp) + movq %xmm8, 64(%rsp) + movq %xmm9, 72(%rsp) + movq %xmm10, 80(%rsp) + movq %xmm11, 88(%rsp) SETUP_FP_CALLEE_SAVE_FRAME - subq LITERAL(8), %rsp // Alignment padding. - CFI_ADJUST_CFA_OFFSET(8) - movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. + + .ifnc RAW_VAR(reg), rdi + movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. + .endif call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - movq %rax, REG_VAR(reg) // Return result into `reg`. - addq LITERAL(8), %rsp // Remove padding. - CFI_ADJUST_CFA_OFFSET(-8) + .ifnc RAW_VAR(reg), rax + movq %rax, REG_VAR(reg) // Return result into `reg`. + .endif + RESTORE_FP_CALLEE_SAVE_FRAME + // Restore floating-point registers. + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + movq 64(%rsp), %xmm8 + movq 72(%rsp), %xmm9 + movq 80(%rsp), %xmm10 + movq 88(%rsp), %xmm11 + // Remove floating-point registers. + addq MACRO_LITERAL(12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(12 * 8)) + // Restore core regs, except `reg`, as it is used to return the + // result of this function (simply remove it from the stack instead). + POP_REG_NE r11, RAW_VAR(reg) + POP_REG_NE r10, RAW_VAR(reg) + POP_REG_NE r9, RAW_VAR(reg) + POP_REG_NE r8, RAW_VAR(reg) + POP_REG_NE rdi, RAW_VAR(reg) + POP_REG_NE rsi, RAW_VAR(reg) + POP_REG_NE rdx, RAW_VAR(reg) + POP_REG_NE rcx, RAW_VAR(reg) + POP_REG_NE rax, RAW_VAR(reg) ret END_FUNCTION VAR(name) END_MACRO -// Note: art_quick_read_barrier_mark_reg00 is implemented above. +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx @@ -1889,7 +1909,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx // cannot be used to pass arguments. READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi -// Note: art_quick_read_barrier_mark_reg07 is implemented above. +READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index e5a2f36938..d0dad6494e 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -7695,7 +7695,7 @@ ArtField* ClassLinker::ResolveField(const DexFile& dex_file, } if (is_static) { - resolved = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx); + resolved = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx); } else { resolved = klass->FindInstanceField(dex_cache.Get(), field_idx); } diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index 301111251a..4019a5b536 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -153,6 +153,14 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } } +inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { + // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 + if (UNLIKELY(mark_from_read_barrier_measurements_)) { + return MarkFromReadBarrierWithMeasurements(from_ref); + } + return Mark(from_ref); +} + inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { DCHECK(region_space_->IsInFromSpace(from_ref)); LockWord lw = from_ref->GetLockWord(false); diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 78eed9ec25..d2d2f234ab 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -17,7 +17,9 @@ #include "concurrent_copying.h" #include "art_field-inl.h" +#include "base/histogram-inl.h" #include "base/stl_util.h" +#include "base/systrace.h" #include "debugger.h" #include "gc/accounting/heap_bitmap-inl.h" #include "gc/accounting/space_bitmap-inl.h" @@ -39,7 +41,9 @@ namespace collector { static constexpr size_t kDefaultGcMarkStackSize = 2 * MB; -ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) +ConcurrentCopying::ConcurrentCopying(Heap* heap, + const std::string& name_prefix, + bool measure_read_barrier_slow_path) : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "concurrent copying + mark sweep"), @@ -54,6 +58,14 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff), weak_ref_access_enabled_(true), skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock), + measure_read_barrier_slow_path_(measure_read_barrier_slow_path), + rb_slow_path_ns_(0), + rb_slow_path_count_(0), + rb_slow_path_count_gc_(0), + rb_slow_path_histogram_lock_("Read barrier histogram lock"), + rb_slow_path_time_histogram_("Mutator time in read barrier slow path", 500, 32), + rb_slow_path_count_total_(0), + rb_slow_path_count_gc_total_(0), rb_table_(heap_->GetReadBarrierTable()), force_evacuate_all_(false), immune_gray_stack_lock_("concurrent copying immune gray stack lock", @@ -162,6 +174,14 @@ void ConcurrentCopying::InitializePhase() { MutexLock mu(Thread::Current(), mark_stack_lock_); CHECK(false_gray_stack_.empty()); } + + mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_; + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.StoreRelaxed(0); + rb_slow_path_count_.StoreRelaxed(0); + rb_slow_path_count_gc_.StoreRelaxed(0); + } + immune_spaces_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); @@ -2030,9 +2050,17 @@ void ConcurrentCopying::FinishPhase() { MutexLock mu(Thread::Current(), skipped_blocks_lock_); skipped_blocks_map_.clear(); } - ReaderMutexLock mu(self, *Locks::mutator_lock_); - WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); - heap_->ClearMarkedObjects(); + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + WriterMutexLock mu2(self, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + if (measure_read_barrier_slow_path_) { + MutexLock mu(self, rb_slow_path_histogram_lock_); + rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed()); + rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed(); + rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed(); + } } bool ConcurrentCopying::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) { @@ -2070,6 +2098,37 @@ void ConcurrentCopying::RevokeAllThreadLocalBuffers() { region_space_->RevokeAllThreadLocalBuffers(); } +mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) { + if (Thread::Current() != thread_running_gc_) { + rb_slow_path_count_.FetchAndAddRelaxed(1u); + } else { + rb_slow_path_count_gc_.FetchAndAddRelaxed(1u); + } + ScopedTrace tr(__FUNCTION__); + const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u; + mirror::Object* ret = Mark(from_ref); + if (measure_read_barrier_slow_path_) { + rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time); + } + return ret; +} + +void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) { + GarbageCollector::DumpPerformanceInfo(os); + MutexLock mu(Thread::Current(), rb_slow_path_histogram_lock_); + if (rb_slow_path_time_histogram_.SampleSize() > 0) { + Histogram<uint64_t>::CumulativeData cumulative_data; + rb_slow_path_time_histogram_.CreateHistogram(&cumulative_data); + rb_slow_path_time_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data); + } + if (rb_slow_path_count_total_ > 0) { + os << "Slow path count " << rb_slow_path_count_total_ << "\n"; + } + if (rb_slow_path_count_gc_total_ > 0) { + os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n"; + } +} + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 166a1f0b2a..6a8d052cb8 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -58,17 +58,24 @@ class ConcurrentCopying : public GarbageCollector { // Enable verbose mode. static constexpr bool kVerboseMode = false; - ConcurrentCopying(Heap* heap, const std::string& name_prefix = ""); + ConcurrentCopying(Heap* heap, + const std::string& name_prefix = "", + bool measure_read_barrier_slow_path = false); ~ConcurrentCopying(); virtual void RunPhases() OVERRIDE - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + REQUIRES(!immune_gray_stack_lock_, + !mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_); void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); - void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + void FinishPhase() REQUIRES(!mark_stack_lock_, + !rb_slow_path_histogram_lock_, + !skipped_blocks_lock_); void BindBitmaps() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); @@ -95,7 +102,11 @@ class ConcurrentCopying : public GarbageCollector { return IsMarked(ref) == ref; } template<bool kGrayImmuneObject = true> - ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) + ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + ALWAYS_INLINE mirror::Object* MarkFromReadBarrier(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); bool IsMarking() const { return is_marking_; @@ -203,6 +214,10 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES(!mark_stack_lock_); void ScanImmuneObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + mirror::Object* MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; @@ -251,6 +266,20 @@ class ConcurrentCopying : public GarbageCollector { Atomic<size_t> to_space_bytes_skipped_; Atomic<size_t> to_space_objects_skipped_; + // If measure_read_barrier_slow_path_ is true, we count how long is spent in MarkFromReadBarrier + // and also log. + bool measure_read_barrier_slow_path_; + // mark_from_read_barrier_measurements_ is true if systrace is enabled or + // measure_read_barrier_time_ is true. + bool mark_from_read_barrier_measurements_; + Atomic<uint64_t> rb_slow_path_ns_; + Atomic<uint64_t> rb_slow_path_count_; + Atomic<uint64_t> rb_slow_path_count_gc_; + mutable Mutex rb_slow_path_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + Histogram<uint64_t> rb_slow_path_time_histogram_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + uint64_t rb_slow_path_count_gc_total_ GUARDED_BY(rb_slow_path_histogram_lock_); + accounting::ReadBarrierTable* rb_table_; bool force_evacuate_all_; // True if all regions are evacuated. Atomic<bool> updated_all_immune_objects_; diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 580486aa68..e0b71a7e24 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -181,7 +181,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark void RecordFree(const ObjectBytePair& freed); // Record a free of large objects. void RecordFreeLOS(const ObjectBytePair& freed); - void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); + virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); // Helper functions for querying if objects are marked. These are used for processing references, // and will be used for reading system weaks while the GC is running. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index a6d62a9346..6f4767e391 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -157,6 +157,7 @@ Heap::Heap(size_t initial_size, bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction_for_oom, uint64_t min_interval_homogeneous_space_compaction_by_oom) : non_moving_space_(nullptr), @@ -599,7 +600,9 @@ Heap::Heap(size_t initial_size, garbage_collectors_.push_back(semi_space_collector_); } if (MayUseCollector(kCollectorTypeCC)) { - concurrent_copying_collector_ = new collector::ConcurrentCopying(this); + concurrent_copying_collector_ = new collector::ConcurrentCopying(this, + "", + measure_gc_performance); garbage_collectors_.push_back(concurrent_copying_collector_); } if (MayUseCollector(kCollectorTypeMC)) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 6fb048a5d7..bb0d11a1d7 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -182,6 +182,7 @@ class Heap { bool verify_pre_sweeping_rosalloc, bool verify_post_gc_rosalloc, bool gc_stress_mode, + bool measure_gc_performance, bool use_homogeneous_space_compaction, uint64_t min_interval_homogeneous_space_compaction_by_oom); diff --git a/runtime/interpreter/mterp/arm64/fbinop2addr.S b/runtime/interpreter/mterp/arm64/fbinop2addr.S index 0d57cbf2cf..04236adb81 100644 --- a/runtime/interpreter/mterp/arm64/fbinop2addr.S +++ b/runtime/interpreter/mterp/arm64/fbinop2addr.S @@ -7,8 +7,7 @@ */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 $instr // s2<- op diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S index 2d3a11eafa..7628ed3c47 100644 --- a/runtime/interpreter/mterp/arm64/footer.S +++ b/runtime/interpreter/mterp/arm64/footer.S @@ -234,7 +234,7 @@ MterpOnStackReplacement: #if MTERP_LOGGING mov x0, xSELF add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 + sxtw x2, wINST bl MterpLogOSR #endif mov x0, #1 // Signal normal return diff --git a/runtime/interpreter/mterp/arm64/funopNarrow.S b/runtime/interpreter/mterp/arm64/funopNarrow.S index 9f5ad1e87a..aed830bc23 100644 --- a/runtime/interpreter/mterp/arm64/funopNarrow.S +++ b/runtime/interpreter/mterp/arm64/funopNarrow.S @@ -8,10 +8,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopNarrower.S b/runtime/interpreter/mterp/arm64/funopNarrower.S index 411396b290..6fddfea979 100644 --- a/runtime/interpreter/mterp/arm64/funopNarrower.S +++ b/runtime/interpreter/mterp/arm64/funopNarrower.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopWide.S b/runtime/interpreter/mterp/arm64/funopWide.S index d83b39c251..409e26b6ec 100644 --- a/runtime/interpreter/mterp/arm64/funopWide.S +++ b/runtime/interpreter/mterp/arm64/funopWide.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/funopWider.S b/runtime/interpreter/mterp/arm64/funopWider.S index 50a73f1997..4c91ebcdc6 100644 --- a/runtime/interpreter/mterp/arm64/funopWider.S +++ b/runtime/interpreter/mterp/arm64/funopWider.S @@ -7,10 +7,9 @@ */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG $srcreg, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A $instr // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE $tgtreg, w4 // vA<- d0 diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_16.S b/runtime/interpreter/mterp/arm64/op_const_wide_16.S index e43628bccd..553d481541 100644 --- a/runtime/interpreter/mterp/arm64/op_const_wide_16.S +++ b/runtime/interpreter/mterp/arm64/op_const_wide_16.S @@ -1,8 +1,7 @@ /* const-wide/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance rPC, load rINST - sbfm x0, x0, 0, 31 GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_const_wide_32.S b/runtime/interpreter/mterp/arm64/op_const_wide_32.S index 527f7d8167..9dc4fc3867 100644 --- a/runtime/interpreter/mterp/arm64/op_const_wide_32.S +++ b/runtime/interpreter/mterp/arm64/op_const_wide_32.S @@ -1,10 +1,9 @@ /* const-wide/32 vAA, #+BBBBbbbb */ - FETCH w0, 1 // w0<- 0000bbbb (low) + FETCH w0, 1 // x0<- 000000000000bbbb (low) lsr w3, wINST, #8 // w3<- AA - FETCH_S w2, 2 // w2<- ssssBBBB (high) + FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high) FETCH_ADVANCE_INST 3 // advance rPC, load wINST GET_INST_OPCODE ip // extract opcode from wINST - orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb - sbfm x0, x0, 0, 31 + orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_iget_quick.S b/runtime/interpreter/mterp/arm64/op_iget_quick.S index 45c68a3a79..699b2c4229 100644 --- a/runtime/interpreter/mterp/arm64/op_iget_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iget_quick.S @@ -5,8 +5,7 @@ FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null $load w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST $extend diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S index 2480d2d222..30b30c2d4d 100644 --- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S @@ -3,7 +3,7 @@ FETCH w4, 1 // w4<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cbz w3, common_errNullObject // object was null + cbz w3, common_errNullObject // object was null add x4, x3, x4 // create direct pointer ldr x0, [x4] FETCH_ADVANCE_INST 2 // advance rPC, load wINST diff --git a/runtime/interpreter/mterp/arm64/op_instance_of.S b/runtime/interpreter/mterp/arm64/op_instance_of.S index 647bc75cfd..a56705a68b 100644 --- a/runtime/interpreter/mterp/arm64/op_instance_of.S +++ b/runtime/interpreter/mterp/arm64/op_instance_of.S @@ -13,8 +13,7 @@ mov x3, xSELF // w3<- self bl MterpInstanceOf // (index, &obj, method, self) ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET] - lsr w2, wINST, #8 // w2<- A+ - and w2, w2, #15 // w2<- A + ubfx w2, wINST, #8, #4 // w2<- A PREFETCH_INST 2 cbnz x1, MterpException ADVANCE 2 // advance rPC diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S index 13d2120392..35830f3881 100644 --- a/runtime/interpreter/mterp/arm64/op_int_to_long.S +++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S @@ -1 +1 @@ -%include "arm64/funopWider.S" {"instr":"sbfm x0, x0, 0, 31", "srcreg":"w0", "tgtreg":"x0"} +%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"} diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S index 27b5dc57b7..566e2bfdd4 100644 --- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S @@ -3,8 +3,7 @@ FETCH w3, 1 // w3<- field byte offset GET_VREG w2, w2 // w2<- fp[B], the object pointer ubfx w0, wINST, #8, #4 // w0<- A - cmp w2, #0 // check object for null - beq common_errNullObject // object was null + cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST add x1, x2, x3 // create a direct pointer diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S index 1456f1a650..4faa6d2410 100644 --- a/runtime/interpreter/mterp/arm64/op_packed_switch.S +++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S @@ -9,12 +9,12 @@ * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2 bl $func // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags diff --git a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S index 0b918910c7..95f81c5a23 100644 --- a/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_rem_float_2addr.S @@ -1,12 +1,10 @@ /* rem vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 bl fmodf - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A FETCH_ADVANCE_INST 1 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG s0, w9 diff --git a/runtime/interpreter/mterp/arm64/op_shl_int.S b/runtime/interpreter/mterp/arm64/op_shl_int.S index bd0f237cfe..3062a3fad8 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binop.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S index b4671d2f1c..9a7e09f016 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S index 4dd32e08a2..17f57f9f5c 100644 --- a/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_shl_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsl w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"lsl w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int.S b/runtime/interpreter/mterp/arm64/op_shr_int.S index c214a18fd0..493b7407f7 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binop.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S index 3c1484b0c7..6efe8ee010 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S index 26d5024a2c..274080c4b2 100644 --- a/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_shr_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"asr w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"asr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int.S b/runtime/interpreter/mterp/arm64/op_ushr_int.S index bb8382b925..005452b554 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int.S @@ -1 +1 @@ -%include "arm64/binop.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binop.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S index dbccb9952a..1cb8cb7442 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int_2addr.S @@ -1 +1 @@ -%include "arm64/binop2addr.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binop2addr.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S index 35090c46d7..ff30e1f1a8 100644 --- a/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S +++ b/runtime/interpreter/mterp/arm64/op_ushr_int_lit8.S @@ -1 +1 @@ -%include "arm64/binopLit8.S" {"preinstr":"and w1, w1, #31", "instr":"lsr w0, w0, w1"} +%include "arm64/binopLit8.S" {"instr":"lsr w0, w0, w1"} diff --git a/runtime/interpreter/mterp/arm64/shiftWide.S b/runtime/interpreter/mterp/arm64/shiftWide.S index 6306fca5cb..dcb2fb701a 100644 --- a/runtime/interpreter/mterp/arm64/shiftWide.S +++ b/runtime/interpreter/mterp/arm64/shiftWide.S @@ -12,8 +12,7 @@ and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - $opcode x0, x1, x2 // Do the shift. + $opcode x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/shiftWide2addr.S b/runtime/interpreter/mterp/arm64/shiftWide2addr.S index 77d104a62b..b860dfddd3 100644 --- a/runtime/interpreter/mterp/arm64/shiftWide2addr.S +++ b/runtime/interpreter/mterp/arm64/shiftWide2addr.S @@ -8,8 +8,7 @@ GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - $opcode x0, x0, x1 + $opcode x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S index df0b686d37..d470551173 100644 --- a/runtime/interpreter/mterp/out/mterp_arm64.S +++ b/runtime/interpreter/mterp/out/mterp_arm64.S @@ -747,10 +747,9 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_wide_16: /* 0x16 */ /* File: arm64/op_const_wide_16.S */ /* const-wide/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S x0, 1 // x0<- ssssssssssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance rPC, load rINST - sbfm x0, x0, 0, 31 GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction @@ -760,13 +759,12 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_wide_32: /* 0x17 */ /* File: arm64/op_const_wide_32.S */ /* const-wide/32 vAA, #+BBBBbbbb */ - FETCH w0, 1 // w0<- 0000bbbb (low) + FETCH w0, 1 // x0<- 000000000000bbbb (low) lsr w3, wINST, #8 // w3<- AA - FETCH_S w2, 2 // w2<- ssssBBBB (high) + FETCH_S x2, 2 // x2<- ssssssssssssBBBB (high) FETCH_ADVANCE_INST 3 // advance rPC, load wINST GET_INST_OPCODE ip // extract opcode from wINST - orr w0, w0, w2, lsl #16 // w0<- BBBBbbbb - sbfm x0, x0, 0, 31 + orr x0, x0, x2, lsl #16 // x0<- ssssssssBBBBbbbb SET_VREG_WIDE x0, w3 GOTO_OPCODE ip // jump to next instruction @@ -934,8 +932,7 @@ artMterpAsmInstructionStart = .L_op_nop mov x3, xSELF // w3<- self bl MterpInstanceOf // (index, &obj, method, self) ldr x1, [xSELF, #THREAD_EXCEPTION_OFFSET] - lsr w2, wINST, #8 // w2<- A+ - and w2, w2, #15 // w2<- A + ubfx w2, wINST, #8, #4 // w2<- A PREFETCH_INST 2 cbnz x1, MterpException ADVANCE 2 // advance rPC @@ -1143,14 +1140,14 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2 bl MterpDoPackedSwitch // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags /* ------------------------------ */ @@ -1168,14 +1165,14 @@ artMterpAsmInstructionStart = .L_op_nop * for: packed-switch, sparse-switch */ /* op vAA, +BBBB */ - FETCH w0, 1 // w0<- bbbb (lo) - FETCH w1, 2 // w1<- BBBB (hi) + FETCH w0, 1 // x0<- 000000000000bbbb (lo) + FETCH_S x1, 2 // x1<- ssssssssssssBBBB (hi) lsr w3, wINST, #8 // w3<- AA - orr w0, w0, w1, lsl #16 // w0<- BBBBbbbb + orr x0, x0, x1, lsl #16 // x0<- ssssssssBBBBbbbb GET_VREG w1, w3 // w1<- vAA - add x0, xPC, w0, lsl #1 // w0<- PC + BBBBbbbb*2 + add x0, xPC, x0, lsl #1 // x0<- PC + BBBBbbbb*2 bl MterpDoSparseSwitch // w0<- code-unit branch offset - sbfm xINST, x0, 0, 31 + sxtw xINST, w0 b MterpCommonTakenBranchNoFlags @@ -3345,11 +3342,10 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A - sbfm x0, x0, 0, 31 // d0<- op + sxtw x0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 GOTO_OPCODE ip // jump to next instruction @@ -3369,10 +3365,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf s0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -3392,10 +3387,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG w0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf d0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3415,10 +3409,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3438,10 +3431,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf s0, x0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -3461,10 +3453,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A scvtf d0, x0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3485,10 +3476,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs w0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3508,10 +3498,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs x0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 @@ -3531,10 +3520,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG s0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvt d0, s0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE d0, w4 // vA<- d0 @@ -3554,10 +3542,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs w0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG w0, w4 // vA<- d0 @@ -3577,10 +3564,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvtzs x0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 // vA<- d0 @@ -3600,10 +3586,9 @@ artMterpAsmInstructionStart = .L_op_nop */ /* unop vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w4, wINST, #8 // w4<- A+ + ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE d0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - and w4, w4, #15 // w4<- A fcvt s0, d0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST SET_VREG s0, w4 // vA<- d0 @@ -4032,7 +4017,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4071,7 +4056,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4110,7 +4095,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero // is second operand zero? .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -4424,8 +4409,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - lsl x0, x1, x2 // Do the shift. + lsl x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -4450,8 +4434,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - asr x0, x1, x2 // Do the shift. + asr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -4476,8 +4459,7 @@ artMterpAsmInstructionStart = .L_op_nop and w1, w0, #255 // w1<- BB GET_VREG_WIDE x1, w1 // x1<- vBB FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and x2, x2, #63 // Mask low 6 - lsr x0, x1, x2 // Do the shift. + lsr x0, x1, x2 // Do the shift. Only low 6 bits of x2 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w3 // vAA<- x0 GOTO_OPCODE ip // jump to next instruction @@ -5089,7 +5071,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5125,7 +5107,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5161,7 +5143,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -5463,8 +5445,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - lsl x0, x0, x1 + lsl x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5485,8 +5466,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - asr x0, x0, x1 + asr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5507,8 +5487,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w1, w1 // x1<- vB GET_VREG_WIDE x0, w2 // x0<- vA FETCH_ADVANCE_INST 1 // advance rPC, load rINST - and x1, x1, #63 // Mask low 6 bits. - lsr x0, x0, x1 + lsr x0, x0, x1 // Do the shift. Only low 6 bits of x1 are used. GET_INST_OPCODE ip // extract opcode from rINST SET_VREG_WIDE x0, w2 // vAA<- result GOTO_OPCODE ip // jump to next instruction @@ -5529,8 +5508,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fadd s2, s0, s1 // s2<- op @@ -5554,8 +5532,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fsub s2, s0, s1 // s2<- op @@ -5579,8 +5556,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fmul s2, s0, s1 // s2<- op @@ -5604,8 +5580,7 @@ artMterpAsmInstructionStart = .L_op_nop */ /* binop/2addr vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 fdiv s2, s0, s1 // s2<- op @@ -5621,13 +5596,11 @@ artMterpAsmInstructionStart = .L_op_nop /* File: arm64/op_rem_float_2addr.S */ /* rem vA, vB */ lsr w3, wINST, #12 // w3<- B - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A GET_VREG s1, w3 GET_VREG s0, w9 bl fmodf - lsr w9, wINST, #8 // w9<- A+ - and w9, w9, #15 // w9<- A + ubfx w9, wINST, #8, #4 // w9<- A FETCH_ADVANCE_INST 1 // advance rPC, load rINST GET_INST_OPCODE ip // extract opcode from rINST SET_VREG s0, w9 @@ -6381,7 +6354,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsl w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6417,7 +6390,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes asr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6453,7 +6426,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w1, common_errDivideByZero .endif FETCH_ADVANCE_INST 2 // advance rPC, load rINST - and w1, w1, #31 // optional op; may set condition codes + // optional op; may set condition codes lsr w0, w0, w1 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 @@ -6471,8 +6444,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldr w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6489,7 +6461,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w4, 1 // w4<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cbz w3, common_errNullObject // object was null + cbz w3, common_errNullObject // object was null add x4, x3, x4 // create direct pointer ldr x0, [x4] FETCH_ADVANCE_INST 2 // advance rPC, load wINST @@ -6544,8 +6516,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w3, 1 // w3<- field byte offset GET_VREG w2, w2 // w2<- fp[B], the object pointer ubfx w0, wINST, #8, #4 // w0<- A - cmp w2, #0 // check object for null - beq common_errNullObject // object was null + cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST add x1, x2, x3 // create a direct pointer @@ -6710,8 +6681,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrb w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6731,8 +6701,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrsb w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6752,8 +6721,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrh w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6773,8 +6741,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A - cmp x3, #0 // check object for null - beq common_errNullObject // object was null + cbz w3, common_errNullObject // object was null ldrsh w0, [x3, x1] // w0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -11521,7 +11488,7 @@ MterpOnStackReplacement: #if MTERP_LOGGING mov x0, xSELF add x1, xFP, #OFF_FP_SHADOWFRAME - sbfm x2, xINST, 0, 31 + sxtw x2, wINST bl MterpLogOSR #endif mov x0, #1 // Signal normal return diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc index 9c77d3814c..1c31c5764b 100644 --- a/runtime/mirror/class.cc +++ b/runtime/mirror/class.cc @@ -748,21 +748,24 @@ ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const String return nullptr; } -ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache, +ArtField* Class::FindStaticField(Thread* self, + Class* klass, + const DexCache* dex_cache, uint32_t dex_field_idx) { - for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) { + for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) { // Is the field in this class? ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx); if (f != nullptr) { return f; } - // Wrap k incase it moves during GetDirectInterface. + // Though GetDirectInterface() should not cause thread suspension when called + // from here, it takes a Handle as an argument, so we need to wrap `k`. + ScopedAssertNoThreadSuspension ants(self, __FUNCTION__); StackHandleScope<1> hs(self); - HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k)); + Handle<mirror::Class> h_k(hs.NewHandle(k)); // Is this field in any of this class' interfaces? for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) { - StackHandleScope<1> hs2(self); - Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i))); + mirror::Class* interface = GetDirectInterface(self, h_k, i); f = FindStaticField(self, interface, dex_cache, dex_field_idx); if (f != nullptr) { return f; diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index f044b5968b..9be9f0107b 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -1091,7 +1091,9 @@ class MANAGED Class FINAL : public Object { // Finds the given static field in this class or superclass, only searches classes that // have the same dex cache. - static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const DexCache* dex_cache, + static ArtField* FindStaticField(Thread* self, + Class* klass, + const DexCache* dex_cache, uint32_t dex_field_idx) SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 79b18aa84e..d987f65a08 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -342,7 +342,7 @@ static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uin return; } if (is_static) { - field = mirror::Class::FindStaticField(self, klass, dex_cache.Get(), field_idx); + field = mirror::Class::FindStaticField(self, klass.Get(), dex_cache.Get(), field_idx); } else { field = klass->FindInstanceField(dex_cache.Get(), field_idx); } diff --git a/runtime/oat.h b/runtime/oat.h index e506e3c476..9b8f5452c4 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '8', '3', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 0c3eb3b628..92efa211ce 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -220,7 +220,7 @@ inline void ReadBarrier::AssertToSpaceInvariant(GcRootSource* gc_root_source, } inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) { - return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj); + return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj); } inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj, diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 21cd2aa2c9..079c079244 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -989,6 +989,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { xgc_option.verify_pre_sweeping_rosalloc_, xgc_option.verify_post_gc_rosalloc_, xgc_option.gcstress_, + xgc_option.measure_, runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM), runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs)); diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt index fbd93fa815..988b220a87 100644 --- a/test/501-regression-packed-switch/info.txt +++ b/test/501-regression-packed-switch/info.txt @@ -1,2 +1,4 @@ Regression test for the interpreter and optimizing's builder which used to trip when compiled code contained a packed switch with no targets. +Regression test for the arm64 mterp miscalculating the switch table +address, zero-extending a register instead of sign-extending. diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali index 8756ed5f23..5a760c7880 100644 --- a/test/501-regression-packed-switch/smali/Test.smali +++ b/test/501-regression-packed-switch/smali/Test.smali @@ -27,3 +27,28 @@ .packed-switch 0x0 .end packed-switch .end method + +.method public static PackedSwitchAfterData(I)I + .registers 1 + goto :pswitch_instr + + :case0 + const/4 v0, 0x1 + return v0 + + :pswitch_data + .packed-switch 0x0 + :case0 + :case1 + .end packed-switch + + :pswitch_instr + packed-switch v0, :pswitch_data + const/4 v0, 0x7 + return v0 + + :case1 + const/4 v0, 0x4 + return v0 + +.end method diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java index b80bc62c50..12bc1a8138 100644 --- a/test/501-regression-packed-switch/src/Main.java +++ b/test/501-regression-packed-switch/src/Main.java @@ -29,5 +29,10 @@ public class Main { if (result != 5) { throw new Error("Expected 5, got " + result); } + m = c.getMethod("PackedSwitchAfterData", new Class[] { int.class }); + result = (Integer) m.invoke(null, new Integer(0)); + if (result != 1) { + throw new Error("Expected 1, got " + result); + } } } diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java index ead94464bf..3366f20cc5 100644 --- a/test/527-checker-array-access-split/src/Main.java +++ b/test/527-checker-array-access-split/src/Main.java @@ -34,9 +34,21 @@ public class Main { /// CHECK-START-ARM64: int Main.constantIndexGet(int[]) instruction_simplifier_arm64 (after) /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (before) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.constantIndexGet(int[]) instruction_simplifier_arm (after) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + public static int constantIndexGet(int array[]) { return array[1]; } @@ -55,9 +67,22 @@ public class Main { /// CHECK: <<Const2:i\d+>> IntConstant 2 /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] + + + /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (before) + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] + /// CHECK-START-ARM: void Main.constantIndexSet(int[]) instruction_simplifier_arm (after) + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Const2>>] public static void constantIndexSet(int array[]) { array[1] = 2; @@ -76,7 +101,20 @@ public class Main { /// CHECK: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>] + + + /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArrayGet [<<Array>>,<<Index>>] + + /// CHECK-START-ARM: int Main.get(int[], int) instruction_simplifier_arm (after) + /// CHECK: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArrayGet [<<Address>>,<<Index>>] public static int get(int array[], int index) { @@ -102,7 +140,26 @@ public class Main { /// CHECK: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>] + + + /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (before) + /// CHECK: ParameterValue + /// CHECK: ParameterValue + /// CHECK: <<Arg:i\d+>> ParameterValue + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Arg>>] + + /// CHECK-START-ARM: void Main.set(int[], int, int) instruction_simplifier_arm (after) + /// CHECK: ParameterValue + /// CHECK: ParameterValue + /// CHECK: <<Arg:i\d+>> ParameterValue + /// CHECK: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address>>,<<Index>>,<<Arg>>] public static void set(int array[], int index, int value) { @@ -126,10 +183,10 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: void Main.getSet(int[], int) GVN_after_arch (after) @@ -137,12 +194,42 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: void Main.getSet(int[], int) GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] public static void getSet(int array[], int index) { array[index] = array[index] + 1; } @@ -166,11 +253,11 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: NewArray - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: int[] Main.accrossGC(int[], int) GVN_after_arch (after) @@ -178,11 +265,45 @@ public class Main { /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant /// CHECK: <<Array:l\d+>> NullCheck /// CHECK: <<Index:i\d+>> BoundsCheck - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: NewArray - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int[] Main.accrossGC(int[], int) GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant + /// CHECK: <<Array:l\d+>> NullCheck + /// CHECK: <<Index:i\d+>> BoundsCheck + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: NewArray + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: ArraySet [<<Address2>>,<<Index>>,<<Add>>] public static int[] accrossGC(int array[], int index) { @@ -196,6 +317,14 @@ public class Main { * Test that the intermediate address is shared between array accesses after * the bounds check have been removed by BCE. */ + // For checker tests `instruction_simplifier_<arch> (after)` below, by the time we reach + // the architecture-specific instruction simplifier, BCE has removed the bounds checks in + // the loop. + + // Note that we do not care that the `DataOffset` is `12`. But if we do not + // specify it and any other `IntConstant` appears before that instruction, + // checker will match the previous `IntConstant`, and we will thus fail the + // check. /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before) /// CHECK: <<Const1:i\d+>> IntConstant 1 @@ -207,14 +336,6 @@ public class Main { /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] - // By the time we reach the architecture-specific instruction simplifier, BCE - // has removed the bounds checks in the loop. - - // Note that we do not care that the `DataOffset` is `12`. But if we do not - // specify it and any other `IntConstant` appears before that instruction, - // checker will match the previous `IntConstant`, and we will thus fail the - // check. - /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after) /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 @@ -222,10 +343,10 @@ public class Main { /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop - /// CHECK: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN_after_arch (after) @@ -235,10 +356,47 @@ public class Main { /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop - /// CHECK: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE1() GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK-NOT: IntermediateAddress /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] public static int canMergeAfterBCE1() { @@ -279,12 +437,12 @@ public class Main { /// CHECK: If // -------------- Loop /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] - /// CHECK-DAG: <<Address1:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] - /// CHECK-DAG: <<Address2:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: <<Address3:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after) @@ -295,7 +453,7 @@ public class Main { /// CHECK: If // -------------- Loop /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] - /// CHECK-DAG: <<Address:l\d+>> Arm64IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] @@ -304,8 +462,55 @@ public class Main { // There should be only one intermediate address computation in the loop. /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN_after_arch (after) - /// CHECK: Arm64IntermediateAddress - /// CHECK-NOT: Arm64IntermediateAddress + /// CHECK: IntermediateAddress + /// CHECK-NOT: IntermediateAddress + + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (before) + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<Address1:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] + /// CHECK-DAG: <<Address2:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: <<Address3:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK: <<Array:l\d+>> NewArray + /// CHECK: <<Index:i\d+>> Phi + /// CHECK: If + // -------------- Loop + /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] + /// CHECK-DAG: <<Address:l\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] + /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] + /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] + /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>] + + /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN_after_arch (after) + /// CHECK: IntermediateAddress + /// CHECK-NOT: IntermediateAddress public static int canMergeAfterBCE2() { int[] array = {0, 1, 2, 3}; @@ -315,6 +520,37 @@ public class Main { return array[array.length - 1]; } + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (before) + /// CHECK-DAG: <<Array1:l\d+>> NewArray + /// CHECK-DAG: <<Array2:l\d+>> NewArray + /// CHECK-DAG: <<Array3:l\d+>> NewArray + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>] + + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after) + /// CHECK-DAG: <<Array1:l\d+>> NewArray + /// CHECK-DAG: <<Array2:l\d+>> NewArray + /// CHECK-DAG: <<Array3:l\d+>> NewArray + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: ArrayGet [<<Array1>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array2>>,<<Index>>] + /// CHECK-DAG: ArrayGet [<<Array3>>,<<Index>>] + + /// CHECK-START-ARM: int Main.checkLongFloatDouble() instruction_simplifier_arm (after) + /// CHECK-NOT: IntermediateAddress + public static int checkLongFloatDouble() { + long[] array_long = {0, 1, 2, 3}; + float[] array_float = {(float)0.0, (float)1.0, (float)2.0, (float)3.0}; + double[] array_double = {0.0, 1.0, 2.0, 3.0}; + double s = 0.0; + + for (int i = 0; i < 4; i++) { + s += (double)array_long[i] + (double)array_float[i] + array_double[i]; + } + return (int)s; + } public static void main(String[] args) { int[] array = {123, 456, 789}; @@ -337,5 +573,7 @@ public class Main { assertIntEquals(4, canMergeAfterBCE1()); assertIntEquals(6, canMergeAfterBCE2()); + + assertIntEquals(18, checkLongFloatDouble()); } } diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index dd6b6f3fbc..8f8b667429 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -527,7 +527,7 @@ TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT). # 484: Baker's fast path based read barrier compiler instrumentation generates code containing # more parallel moves on x86, thus some Checker assertions may fail. -# 527: On ARM64, the read barrier instrumentation does not support the HArm64IntermediateAddress +# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress # instruction yet (b/26601270). # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are # not yet handled in the read barrier configuration. diff --git a/test/run-test b/test/run-test index bbcd4b0f0b..1ef5428726 100755 --- a/test/run-test +++ b/test/run-test @@ -37,7 +37,7 @@ test_dir="test-$$" if [ -z "$TMPDIR" ]; then tmp_dir="/tmp/$USER/${test_dir}" else - tmp_dir="${TMPDIR}/$USER/${test_dir}" + tmp_dir="${TMPDIR}/${test_dir}" fi checker="${progdir}/../tools/checker/checker.py" export JAVA="java" |