diff options
31 files changed, 1411 insertions, 150 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index b50712429e..b3d246cba3 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -34,6 +34,10 @@ ART_BUILD_TARGET_DEBUG ?= true ART_BUILD_HOST_NDEBUG ?= true ART_BUILD_HOST_DEBUG ?= true +# Set this to change what opt level Art is built at. +ART_DEBUG_OPT_FLAG ?= -O2 +ART_NDEBUG_OPT_FLAG ?= -O3 + # Enable the static builds only for checkbuilds. ifneq (,$(filter checkbuild,$(MAKECMDGOALS))) ART_BUILD_HOST_STATIC ?= true @@ -319,11 +323,11 @@ endif # Cflags for non-debug ART and ART tools. art_non_debug_cflags := \ - -O3 + $(ART_NDEBUG_OPT_FLAG) # Cflags for debug ART and ART tools. art_debug_cflags := \ - -O2 \ + $(ART_DEBUG_OPT_FLAG) \ -DDYNAMIC_ANNOTATIONS_ENABLED=1 \ -DVIXL_DEBUG \ -UNDEBUG diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index 5af2242963..16b4386938 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -394,76 +394,77 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x0000006c: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64[] = { - 0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF, - 0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF, - 0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF, - 0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xFF, 0x68, 0x00, 0xA5, 0xAF, - 0x6C, 0x00, 0xAE, 0xE7, 0x70, 0x00, 0xA7, 0xAF, 0x74, 0x00, 0xA8, 0xAF, - 0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF, - 0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF, - 0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF, - 0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67, + 0x90, 0xFF, 0xBD, 0x67, 0x68, 0x00, 0xBF, 0xFF, 0x60, 0x00, 0xBE, 0xFF, + 0x58, 0x00, 0xBC, 0xFF, 0x50, 0x00, 0xB7, 0xFF, 0x48, 0x00, 0xB6, 0xFF, + 0x40, 0x00, 0xB5, 0xFF, 0x38, 0x00, 0xB4, 0xFF, 0x30, 0x00, 0xB3, 0xFF, + 0x28, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xFF, 0x78, 0x00, 0xA5, 0xAF, + 0x7C, 0x00, 0xAE, 0xE7, 0x80, 0x00, 0xA7, 0xAF, 0x84, 0x00, 0xA8, 0xAF, + 0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x28, 0x00, 0xB2, 0xDF, + 0x30, 0x00, 0xB3, 0xDF, 0x38, 0x00, 0xB4, 0xDF, 0x40, 0x00, 0xB5, 0xDF, + 0x48, 0x00, 0xB6, 0xDF, 0x50, 0x00, 0xB7, 0xDF, 0x58, 0x00, 0xBC, 0xDF, + 0x60, 0x00, 0xBE, 0xDF, 0x68, 0x00, 0xBF, 0xDF, 0x70, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, }; static constexpr uint8_t expected_cfi_kMips64[] = { - 0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06, + 0x44, 0x0E, 0x70, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06, 0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E, - 0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E, - 0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, + 0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x90, 0x01, 0x44, 0x0E, + 0x70, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, 0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, - 0x0B, 0x0E, 0x60, + 0x0B, 0x0E, 0x70, }; -// 0x00000000: daddiu r29, r29, -96 -// 0x00000004: .cfi_def_cfa_offset: 96 -// 0x00000004: sd r31, +88(r29) +// 0x00000000: daddiu r29, r29, -112 +// 0x00000004: .cfi_def_cfa_offset: 112 +// 0x00000004: sd r31, +104(r29) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r30, +80(r29) +// 0x00000008: sd r30, +96(r29) // 0x0000000c: .cfi_offset: r30 at cfa-16 -// 0x0000000c: sd r28, +72(r29) +// 0x0000000c: sd r28, +88(r29) // 0x00000010: .cfi_offset: r28 at cfa-24 -// 0x00000010: sd r23, +64(r29) +// 0x00000010: sd r23, +80(r29) // 0x00000014: .cfi_offset: r23 at cfa-32 -// 0x00000014: sd r22, +56(r29) +// 0x00000014: sd r22, +72(r29) // 0x00000018: .cfi_offset: r22 at cfa-40 -// 0x00000018: sd r21, +48(r29) +// 0x00000018: sd r21, +64(r29) // 0x0000001c: .cfi_offset: r21 at cfa-48 -// 0x0000001c: sd r20, +40(r29) +// 0x0000001c: sd r20, +56(r29) // 0x00000020: .cfi_offset: r20 at cfa-56 -// 0x00000020: sd r19, +32(r29) +// 0x00000020: sd r19, +48(r29) // 0x00000024: .cfi_offset: r19 at cfa-64 -// 0x00000024: sd r18, +24(r29) +// 0x00000024: sd r18, +40(r29) // 0x00000028: .cfi_offset: r18 at cfa-72 // 0x00000028: sd r4, +0(r29) -// 0x0000002c: sw r5, +104(r29) -// 0x00000030: swc1 f14, +108(r29) -// 0x00000034: sw r7, +112(r29) -// 0x00000038: sw r8, +116(r29) +// 0x0000002c: sw r5, +120(r29) +// 0x00000030: swc1 f14, +124(r29) +// 0x00000034: sw r7, +128(r29) +// 0x00000038: sw r8, +132(r29) // 0x0000003c: daddiu r29, r29, -32 -// 0x00000040: .cfi_def_cfa_offset: 128 +// 0x00000040: .cfi_def_cfa_offset: 144 // 0x00000040: daddiu r29, r29, 32 -// 0x00000044: .cfi_def_cfa_offset: 96 +// 0x00000044: .cfi_def_cfa_offset: 112 // 0x00000044: .cfi_remember_state -// 0x00000044: ld r18, +24(r29) +// 0x00000044: ld r18, +40(r29) // 0x00000048: .cfi_restore: r18 -// 0x00000048: ld r19, +32(r29) +// 0x00000048: ld r19, +48(r29) // 0x0000004c: .cfi_restore: r19 -// 0x0000004c: ld r20, +40(r29) +// 0x0000004c: ld r20, +56(r29) // 0x00000050: .cfi_restore: r20 -// 0x00000050: ld r21, +48(r29) +// 0x00000050: ld r21, +64(r29) // 0x00000054: .cfi_restore: r21 -// 0x00000054: ld r22, +56(r29) +// 0x00000054: ld r22, +72(r29) // 0x00000058: .cfi_restore: r22 -// 0x00000058: ld r23, +64(r29) +// 0x00000058: ld r23, +80(r29) // 0x0000005c: .cfi_restore: r23 -// 0x0000005c: ld r28, +72(r29) +// 0x0000005c: ld r28, +88(r29) // 0x00000060: .cfi_restore: r28 -// 0x00000060: ld r30, +80(r29) +// 0x00000060: ld r30, +96(r29) // 0x00000064: .cfi_restore: r30 -// 0x00000064: ld r31, +88(r29) +// 0x00000064: ld r31, +104(r29) // 0x00000068: .cfi_restore: r31 -// 0x00000068: daddiu r29, r29, 96 +// 0x00000068: daddiu r29, r29, 112 // 0x0000006c: .cfi_def_cfa_offset: 0 // 0x0000006c: jr r31 // 0x00000070: nop // 0x00000074: .cfi_restore_state -// 0x00000074: .cfi_def_cfa_offset: 96 +// 0x00000074: .cfi_def_cfa_offset: 112 + diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc index ecf143d8f5..2d31a9881e 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.cc +++ b/compiler/jni/quick/mips/calling_convention_mips.cc @@ -183,7 +183,7 @@ ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const { } size_t MipsJniCallingConvention::FrameSize() { - // Method*, LR and callee save area size, local reference segment state + // ArtMethod*, RA and callee save area size, local reference segment state size_t frame_data_size = kMipsPointerSize + (2 + CalleeSaveRegisters().size()) * kFramePointerSize; // References plus 2 words for HandleScope header diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc index 3a11bcfe9c..807d740b4d 100644 --- a/compiler/jni/quick/mips64/calling_convention_mips64.cc +++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc @@ -140,6 +140,7 @@ uint32_t Mips64JniCallingConvention::CoreSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor uint32_t result = 0; result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA; + DCHECK_EQ(static_cast<size_t>(POPCOUNT(result)), callee_save_regs_.size() + 1); return result; } @@ -148,9 +149,9 @@ ManagedRegister Mips64JniCallingConvention::ReturnScratchRegister() const { } size_t Mips64JniCallingConvention::FrameSize() { - // Mehtod* and callee save area size, local reference segment state + // ArtMethod*, RA and callee save area size, local reference segment state size_t frame_data_size = kFramePointerSize + - CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t); + (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t); // References plus 2 words for HandleScope header size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index a82d09eedd..d6cb65bd64 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -281,7 +281,7 @@ class OatWriter { // Offsets of the dex cache arrays for each app dex file. For the // boot image, this information is provided by the ImageWriter. - SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_; + SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_; // DexFiles not owned. // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3e6cad83fa..92a5878476 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1376,8 +1376,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); HInstruction* cond = deoptimize->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::RequiresRegister()); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index ffb9b794fc..f68b11b504 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2353,8 +2353,7 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); HInstruction* cond = deoptimize->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::RequiresRegister()); } } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index eb20291e20..f561c97eb0 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -2223,8 +2223,7 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); HInstruction* cond = deoptimize->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::RequiresRegister()); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 2aea859b7d..963eec2529 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1306,8 +1306,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); HInstruction* cond = deoptimize->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::Any()); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index bf570f581b..ed2e4ca87c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1252,8 +1252,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); HInstruction* cond = deoptimize->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::Any()); } } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index d468540091..7814eb9c11 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -73,6 +73,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -1151,4 +1152,16 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { } } +void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { + HInstruction* cond = deoptimize->InputAt(0); + if (cond->IsConstant()) { + if (cond->AsIntConstant()->IsZero()) { + // Never deopt: instruction can be removed. + deoptimize->GetBlock()->RemoveInstruction(deoptimize); + } else { + // Always deopt. + } + } +} + } // namespace art diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 764a11475f..fe16d00b72 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -43,6 +43,93 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { return codegen_->GetGraph()->GetArena(); } +#define __ codegen->GetAssembler()-> + +static void MoveFromReturnRegister(Location trg, + Primitive::Type type, + CodeGeneratorMIPS64* codegen) { + if (!trg.IsValid()) { + DCHECK_EQ(type, Primitive::kPrimVoid); + return; + } + + DCHECK_NE(type, Primitive::kPrimVoid); + + if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { + GpuRegister trg_reg = trg.AsRegister<GpuRegister>(); + if (trg_reg != V0) { + __ Move(V0, trg_reg); + } + } else { + FpuRegister trg_reg = trg.AsFpuRegister<FpuRegister>(); + if (trg_reg != F0) { + if (type == Primitive::kPrimFloat) { + __ MovS(F0, trg_reg); + } else { + __ MovD(F0, trg_reg); + } + } + } +} + +static void MoveArguments(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { + InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); +} + +// Slow-path for fallback (calling the managed code to handle the +// intrinsic) in an intrinsified call. This will copy the arguments +// into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations +// given by the invoke's location summary. If an intrinsic +// modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in); + + __ Bind(GetEntryLabel()); + + SaveLiveRegisters(codegen, invoke_->GetLocations()); + + MoveArguments(invoke_, codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + Location::RegisterLocation(A0)); + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + RestoreLiveRegisters(codegen, invoke_->GetLocations()); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathMIPS64); +}; + +#undef __ + bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); @@ -185,7 +272,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); } -static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { +static void GenNumberOfLeadingZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); @@ -202,7 +289,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenCountZeroes(invoke->GetLocations(), false, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -211,7 +298,103 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenCountZeroes(invoke->GetLocations(), true, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenNumberOfTrailingZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + Location in = locations->InAt(0); + Location out = locations->Out(); + + if (is64bit) { + __ Dsbh(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>()); + __ Dshd(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + __ Dbitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + __ Dclz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + } else { + __ Rotr(out.AsRegister<GpuRegister>(), in.AsRegister<GpuRegister>(), 16); + __ Wsbh(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + __ Bitswap(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + __ Clz(out.AsRegister<GpuRegister>(), out.AsRegister<GpuRegister>()); + } +} + +// int java.lang.Integer.numberOfTrailingZeros(int i) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler()); +} + +// int java.lang.Long.numberOfTrailingZeros(long i) +void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenRotateRight(HInvoke* invoke, + Primitive::Type type, + Mips64Assembler* assembler) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + LocationSummary* locations = invoke->GetLocations(); + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (invoke->InputAt(1)->IsIntConstant()) { + uint32_t shift = static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()); + if (type == Primitive::kPrimInt) { + shift &= 0x1f; + __ Rotr(out, in, shift); + } else { + shift &= 0x3f; + if (shift < 32) { + __ Drotr(out, in, shift); + } else { + shift &= 0x1f; + __ Drotr32(out, in, shift); + } + } + } else { + GpuRegister shamt = locations->InAt(1).AsRegister<GpuRegister>(); + if (type == Primitive::kPrimInt) { + __ Rotrv(out, in, shamt); + } else { + __ Drotrv(out, in, shamt); + } + } +} + +// int java.lang.Integer.rotateRight(int i, int distance) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerRotateRight(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerRotateRight(HInvoke* invoke) { + GenRotateRight(invoke, Primitive::kPrimInt, GetAssembler()); +} + +// int java.lang.Long.rotateRight(long i, int distance) +void IntrinsicLocationsBuilderMIPS64::VisitLongRotateRight(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongRotateRight(HInvoke* invoke) { + GenRotateRight(invoke, Primitive::kPrimLong, GetAssembler()); } static void GenReverse(LocationSummary* locations, @@ -765,6 +948,505 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMips64PointerSize>().Int32Value()); } +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorMIPS64* codegen) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK((type == Primitive::kPrimInt) || + (type == Primitive::kPrimLong) || + (type == Primitive::kPrimNot)); + Mips64Assembler* assembler = codegen->GetAssembler(); + // Object pointer. + GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); + // Long offset. + GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); + GpuRegister trg = locations->Out().AsRegister<GpuRegister>(); + + __ Daddu(TMP, base, offset); + if (is_volatile) { + __ Sync(0); + } + switch (type) { + case Primitive::kPrimInt: + __ Lw(trg, TMP, 0); + break; + + case Primitive::kPrimNot: + __ Lwu(trg, TMP, 0); + break; + + case Primitive::kPrimLong: + __ Ld(trg, TMP, 0); + break; + + default: + LOG(FATAL) << "Unsupported op size " << type; + UNREACHABLE(); + } +} + +// int sun.misc.Unsafe.getInt(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); +} + +// int sun.misc.Unsafe.getIntVolatile(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); +} + +// long sun.misc.Unsafe.getLong(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); +} + +// long sun.misc.Unsafe.getLongVolatile(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); +} + +// Object sun.misc.Unsafe.getObject(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); +} + +// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorMIPS64* codegen) { + DCHECK((type == Primitive::kPrimInt) || + (type == Primitive::kPrimLong) || + (type == Primitive::kPrimNot)); + Mips64Assembler* assembler = codegen->GetAssembler(); + // Object pointer. + GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); + // Long offset. + GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); + GpuRegister value = locations->InAt(3).AsRegister<GpuRegister>(); + + __ Daddu(TMP, base, offset); + if (is_volatile || is_ordered) { + __ Sync(0); + } + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimNot: + __ Sw(value, TMP, 0); + break; + + case Primitive::kPrimLong: + __ Sd(value, TMP, 0); + break; + + default: + LOG(FATAL) << "Unsupported op size " << type; + UNREACHABLE(); + } + if (is_volatile) { + __ Sync(0); + } + + if (type == Primitive::kPrimNot) { + codegen->MarkGCCard(base, value); + } +} + +// void sun.misc.Unsafe.putInt(Object o, long offset, int x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); +} + +// void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); +} + +// void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); +} + +// void sun.misc.Unsafe.putObject(Object o, long offset, Object x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); +} + +// void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); +} + +// void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); +} + +// void sun.misc.Unsafe.putLong(Object o, long offset, long x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); +} + +// void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); +} + +// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) +void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); +} + +// char java.lang.String.charAt(int index) +void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + + // Location of reference to data array + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + // TODO: Maybe we can support range check elimination. Overall, + // though, I think it's not worth the cost. + // TODO: For simplicity, the index parameter is requested in a + // register, so different from Quick we will not optimize the + // code for constants (which would save a register). + + SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + codegen_->AddSlowPath(slow_path); + + // Load the string size + __ Lw(TMP, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(invoke); + // Revert to slow path if idx is too large, or negative + __ Bgeuc(idx, TMP, slow_path->GetEntryLabel()); + + // out = obj[2*idx]. + __ Sll(TMP, idx, 1); // idx * 2 + __ Daddu(TMP, TMP, obj); // Address of char at location idx + __ Lhu(out, TMP, value_offset); // Load char at location idx + + __ Bind(slow_path->GetExitLabel()); +} + +// int java.lang.String.compareTo(String anotherString) +void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + GpuRegister argument = locations->InAt(1).AsRegister<GpuRegister>(); + SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + codegen_->AddSlowPath(slow_path); + __ Beqzc(argument, slow_path->GetEntryLabel()); + + __ LoadFromOffset(kLoadDoubleword, + TMP, + TR, + QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, + pStringCompareTo).Int32Value()); + __ Jalr(TMP); + __ Nop(); + __ Bind(slow_path->GetExitLabel()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + Mips64Assembler* assembler, + CodeGeneratorMIPS64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + GpuRegister tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<GpuRegister>() : TMP; + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we + // don't know statically, or directly dispatch if we have a constant. + SlowPathCodeMIPS64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) { + // Always needs the slow-path. We could directly dispatch to it, + // but this case should be rare, so for simplicity just put the + // full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>(); + __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max()); + slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); + codegen->AddSlowPath(slow_path); + __ Bltuc(tmp_reg, char_reg, slow_path->GetEntryLabel()); // UTF-16 required + } + + if (start_at_zero) { + DCHECK_EQ(tmp_reg, A2); + // Start-index = 0. + __ Clear(tmp_reg); + } else { + __ Slt(TMP, A2, ZERO); // if fromIndex < 0 + __ Seleqz(A2, A2, TMP); // fromIndex = 0 + } + + __ LoadFromOffset(kLoadDoubleword, + TMP, + TR, + QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pIndexOf).Int32Value()); + __ Jalr(TMP); + __ Nop(); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +// int java.lang.String.indexOf(int ch) +void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime + // calling convention. So it's best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); + + // Need a temp for slow-path codepoint compare, and need to send start-index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +// int java.lang.String.indexOf(int ch, int fromIndex) +void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime + // calling convention. So it's best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +// java.lang.String.String(byte[] bytes) +void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister byte_array = locations->InAt(0).AsRegister<GpuRegister>(); + SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + codegen_->AddSlowPath(slow_path); + __ Beqzc(byte_array, slow_path->GetEntryLabel()); + + __ LoadFromOffset(kLoadDoubleword, + TMP, + TR, + QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromBytes).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Jalr(TMP); + __ Nop(); + __ Bind(slow_path->GetExitLabel()); +} + +// java.lang.String.String(char[] value) +void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + + __ LoadFromOffset(kLoadDoubleword, + TMP, + TR, + QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromChars).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Jalr(TMP); + __ Nop(); +} + +// java.lang.String.String(String original) +void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister string_to_copy = locations->InAt(0).AsRegister<GpuRegister>(); + SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + codegen_->AddSlowPath(slow_path); + __ Beqzc(string_to_copy, slow_path->GetEntryLabel()); + + __ LoadFromOffset(kLoadDoubleword, + TMP, + TR, + QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, pAllocStringFromString).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Jalr(TMP); + __ Nop(); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -776,38 +1458,12 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(UnsafeGet) -UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile) -UNIMPLEMENTED_INTRINSIC(UnsafeGetLong) -UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile) -UNIMPLEMENTED_INTRINSIC(UnsafeGetObject) -UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile) -UNIMPLEMENTED_INTRINSIC(UnsafePut) -UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered) -UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile) -UNIMPLEMENTED_INTRINSIC(UnsafePutObject) -UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered) -UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile) -UNIMPLEMENTED_INTRINSIC(UnsafePutLong) -UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered) -UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile) UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) -UNIMPLEMENTED_INTRINSIC(StringCharAt) -UNIMPLEMENTED_INTRINSIC(StringCompareTo) UNIMPLEMENTED_INTRINSIC(StringEquals) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) -UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) -UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars) -UNIMPLEMENTED_INTRINSIC(StringNewStringFromString) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) -UNIMPLEMENTED_INTRINSIC(LongRotateRight) -UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) -UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) -UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index f7a7e420bb..a1feaf77bd 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -756,7 +756,9 @@ void ReferenceTypePropagation::ProcessWorklist() { while (!worklist_.empty()) { HInstruction* instruction = worklist_.back(); worklist_.pop_back(); - if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) { + bool updated_nullability = UpdateNullability(instruction); + bool updated_reference_type = UpdateReferenceTypeInfo(instruction); + if (updated_nullability || updated_reference_type) { AddDependentInstructionsToWorklist(instruction); } } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index b30f7d772e..f1233ca457 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -83,6 +83,15 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) { + return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f, + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), @@ -608,6 +617,45 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2> + std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2), + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string fmt) { + WarnOnCombinations(reg1_registers.size() * reg2_registers.size()); + + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + if (reg1 == reg2) continue; + (assembler_.get()->*f)(*reg1, *reg2); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <typename Reg1, typename Reg2, typename Reg3> std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3), const std::vector<Reg1*> reg1_registers, diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index b078f3e4cf..00e8995bff 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -335,6 +335,10 @@ void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs) { EmitR(0, rs, rt, rd, 0, 0x04); } +void Mips64Assembler::Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs) { + EmitR(0, rs, rt, rd, 1, 0x06); +} + void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) { EmitR(0, rs, rt, rd, 0, 0x06); } @@ -351,6 +355,10 @@ void Mips64Assembler::Dsrl(GpuRegister rd, GpuRegister rt, int shamt) { EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3a); } +void Mips64Assembler::Drotr(GpuRegister rd, GpuRegister rt, int shamt) { + EmitR(0, static_cast<GpuRegister>(1), rt, rd, shamt, 0x3a); +} + void Mips64Assembler::Dsra(GpuRegister rd, GpuRegister rt, int shamt) { EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3b); } @@ -363,6 +371,10 @@ void Mips64Assembler::Dsrl32(GpuRegister rd, GpuRegister rt, int shamt) { EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3e); } +void Mips64Assembler::Drotr32(GpuRegister rd, GpuRegister rt, int shamt) { + EmitR(0, static_cast<GpuRegister>(1), rt, rd, shamt, 0x3e); +} + void Mips64Assembler::Dsra32(GpuRegister rd, GpuRegister rt, int shamt) { EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3f); } @@ -375,6 +387,10 @@ void Mips64Assembler::Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) { EmitR(0, rs, rt, rd, 0, 0x16); } +void Mips64Assembler::Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs) { + EmitR(0, rs, rt, rd, 1, 0x16); +} + void Mips64Assembler::Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs) { EmitR(0, rs, rt, rd, 0, 0x17); } @@ -773,6 +789,10 @@ void Mips64Assembler::Cvtds(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x21); } +void Mips64Assembler::Cvtsl(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x15, static_cast<FpuRegister>(0), fs, fd, 0x20); +} + void Mips64Assembler::Cvtdl(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x15, static_cast<FpuRegister>(0), fs, fd, 0x21); } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index d083eb4306..33f22d2c2d 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -123,15 +123,19 @@ class Mips64Assembler FINAL : public Assembler { void Sra(GpuRegister rd, GpuRegister rt, int shamt); void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs); void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs); + void Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs); void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs); void Dsll(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 + void Drotr(GpuRegister rd, GpuRegister rt, int shamt); void Dsra(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsll32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 + void Drotr32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsra32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 + void Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16); @@ -230,6 +234,7 @@ class Mips64Assembler FINAL : public Assembler { void Cvtdw(FpuRegister fd, FpuRegister fs); void Cvtsd(FpuRegister fd, FpuRegister fs); void Cvtds(FpuRegister fd, FpuRegister fs); + void Cvtsl(FpuRegister fd, FpuRegister fs); void Cvtdl(FpuRegister fd, FpuRegister fs); void Mfc1(GpuRegister rt, FpuRegister fs); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 2071aca546..16f29b00bc 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -215,6 +215,22 @@ TEST_F(AssemblerMIPS64Test, AbsD) { DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsD, "abs.d ${reg1}, ${reg2}"), "abs.d"); } +TEST_F(AssemblerMIPS64Test, MovS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::MovS, "mov.s ${reg1}, ${reg2}"), "mov.s"); +} + +TEST_F(AssemblerMIPS64Test, MovD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::MovD, "mov.d ${reg1}, ${reg2}"), "mov.d"); +} + +TEST_F(AssemblerMIPS64Test, NegS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::NegS, "neg.s ${reg1}, ${reg2}"), "neg.s"); +} + +TEST_F(AssemblerMIPS64Test, NegD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::NegD, "neg.d ${reg1}, ${reg2}"), "neg.d"); +} + TEST_F(AssemblerMIPS64Test, RoundLS) { DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLS, "round.l.s ${reg1}, ${reg2}"), "round.l.s"); } @@ -307,6 +323,34 @@ TEST_F(AssemblerMIPS64Test, CvtDL) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); } +TEST_F(AssemblerMIPS64Test, CvtDS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "cvt.d.s"); +} + +TEST_F(AssemblerMIPS64Test, CvtDW) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "cvt.d.w"); +} + +TEST_F(AssemblerMIPS64Test, CvtSL) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "cvt.s.l"); +} + +TEST_F(AssemblerMIPS64Test, CvtSD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "cvt.s.d"); +} + +TEST_F(AssemblerMIPS64Test, CvtSW) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w"); +} + +//////////////// +// CALL / JMP // +//////////////// + +TEST_F(AssemblerMIPS64Test, Jalr) { + DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + ////////// // MISC // ////////// @@ -319,6 +363,14 @@ TEST_F(AssemblerMIPS64Test, Dbitswap) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Dbitswap, "dbitswap ${reg1}, ${reg2}"), "dbitswap"); } +TEST_F(AssemblerMIPS64Test, Seb) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Seb, "seb ${reg1}, ${reg2}"), "seb"); +} + +TEST_F(AssemblerMIPS64Test, Seh) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Seh, "seh ${reg1}, ${reg2}"), "seh"); +} + TEST_F(AssemblerMIPS64Test, Dsbh) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Dsbh, "dsbh ${reg1}, ${reg2}"), "dsbh"); } @@ -331,6 +383,42 @@ TEST_F(AssemblerMIPS64Test, Wsbh) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh"); } +TEST_F(AssemblerMIPS64Test, Sll) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sll, 5, "sll ${reg1}, ${reg2}, {imm}"), "sll"); +} + +TEST_F(AssemblerMIPS64Test, Srl) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Srl, 5, "srl ${reg1}, ${reg2}, {imm}"), "srl"); +} + +TEST_F(AssemblerMIPS64Test, Sra) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sra, 5, "sra ${reg1}, ${reg2}, {imm}"), "sra"); +} + +TEST_F(AssemblerMIPS64Test, Dsll) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsll, 5, "dsll ${reg1}, ${reg2}, {imm}"), "dsll"); +} + +TEST_F(AssemblerMIPS64Test, Dsrl) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsrl, 5, "dsrl ${reg1}, ${reg2}, {imm}"), "dsrl"); +} + +TEST_F(AssemblerMIPS64Test, Dsra) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsra, 5, "dsra ${reg1}, ${reg2}, {imm}"), "dsra"); +} + +TEST_F(AssemblerMIPS64Test, Dsll32) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsll32, 5, "dsll32 ${reg1}, ${reg2}, {imm}"), "dsll32"); +} + +TEST_F(AssemblerMIPS64Test, Dsrl32) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsrl32, 5, "dsrl32 ${reg1}, ${reg2}, {imm}"), "dsrl32"); +} + +TEST_F(AssemblerMIPS64Test, Dsra32) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Dsra32, 5, "dsra32 ${reg1}, ${reg2}, {imm}"), "dsra32"); +} + TEST_F(AssemblerMIPS64Test, Sc) { DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc"); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index d09631bc71..631b784787 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -891,7 +891,110 @@ END art_quick_set64_instance ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Generate the allocation entrypoints for each allocator. -GENERATE_ALL_ALLOC_ENTRYPOINTS +GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_rosalloc + // Fast path rosalloc allocation. + // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current + // r2, r3, r12: free. + ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array + // Load the class (r2) + ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] + cbz r2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class + // Check class status. + ldr r3, [r2, #MIRROR_CLASS_STATUS_OFFSET] + cmp r3, #MIRROR_CLASS_STATUS_INITIALIZED + bne .Lart_quick_alloc_object_rosalloc_slow_path + // Add a fake dependence from the + // following access flag and size + // loads to the status load. + // This is to prevent those loads + // from being reordered above the + // status load and reading wrong + // values (an alternative is to use + // a load-acquire for the status). + eor r3, r3, r3 + add r2, r2, r3 + // Check access flags has + // kAccClassIsFinalizable + ldr r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] + tst r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE + bne .Lart_quick_alloc_object_rosalloc_slow_path + + ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local + // allocation stack has room. + // TODO: consider using ldrd. + ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] + cmp r3, r12 + bhs .Lart_quick_alloc_object_rosalloc_slow_path + + ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (r3) + cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread + // local allocation + bhs .Lart_quick_alloc_object_rosalloc_slow_path + // Compute the rosalloc bracket index + // from the size. + // Align up the size by the rosalloc + // bracket quantum size and divide + // by the quantum size and subtract + // by 1. This code is a shorter but + // equivalent version. + sub r3, r3, #1 + lsr r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT + // Load the rosalloc run (r12) + add r12, r9, r3, lsl #POINTER_SIZE_SHIFT + ldr r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET] + // Load the free list head (r3). This + // will be the return val. + ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + cbz r3, .Lart_quick_alloc_object_rosalloc_slow_path + // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. + ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head + // and update the list head with the + // next pointer. + str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] + // Store the class pointer in the + // header. This also overwrites the + // next pointer. The offsets are + // asserted to match. +#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET +#error "Class pointer needs to overwrite next pointer." +#endif + POISON_HEAP_REF r2 + str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET] + // Push the new object onto the thread + // local allocation stack and + // increment the thread local + // allocation stack top. + ldr r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) + str r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] + // Decrement the size of the free list + ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + sub r1, #1 + // TODO: consider combining this store + // and the list head store above using + // strd. + str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] + // Fence. This is "ish" not "ishst" so + // that the code after this allocation + // site will see the right values in + // the fields of the class. + // Alternatively we could use "ishst" + // if we use load-acquire for the + // class status load.) + dmb ish + mov r0, r3 // Set the return value and return. + bx lr + +.Lart_quick_alloc_object_rosalloc_slow_path: + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3 @ save callee saves in case of GC + mov r2, r9 @ pass Thread::Current + bl artAllocObjectFromCodeRosAlloc @ (uint32_t type_idx, Method* method, Thread*) + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +END art_quick_alloc_object_rosalloc /* * Called by managed code when the value in rSUSPEND has been decremented to 0. diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 68156ae7e3..66c8aadf33 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -1615,5 +1615,70 @@ ENTRY art_quick_deoptimize_from_compiled_code move $a0, rSELF # pass Thread::current END art_quick_deoptimize_from_compiled_code -UNIMPLEMENTED art_quick_indexof -UNIMPLEMENTED art_quick_string_compareto + .set push + .set noat +/* java.lang.String.compareTo(String anotherString) */ +ENTRY_NO_GP art_quick_string_compareto +/* $a0 holds address of "this" */ +/* $a1 holds address of "anotherString" */ + beq $a0,$a1,9f # this and anotherString are the same object + move $v0,$zero + + lw $a2,MIRROR_STRING_COUNT_OFFSET($a0) # this.length() + lw $a3,MIRROR_STRING_COUNT_OFFSET($a1) # anotherString.length() + sltu $at,$a2,$a3 + seleqz $t2,$a3,$at + selnez $at,$a2,$at + or $t2,$t2,$at # $t2 now holds min(this.length(),anotherString.length()) + + beqz $t2,9f # while min(this.length(),anotherString.length())-i != 0 + subu $v0,$a2,$a3 # if $t2==0 return + # (this.length() - anotherString.length()) +1: + lhu $t0,MIRROR_STRING_VALUE_OFFSET($a0) # while this.charAt(i) == anotherString.charAt(i) + lhu $t1,MIRROR_STRING_VALUE_OFFSET($a1) + bne $t0,$t1,9f # if this.charAt(i) != anotherString.charAt(i) + subu $v0,$t0,$t1 # return (this.charAt(i) - anotherString.charAt(i)) + daddiu $a0,$a0,2 # point at this.charAt(i++) + subu $t2,$t2,1 # new value of + # min(this.length(),anotherString.length())-i + bnez $t2,1b + daddiu $a1,$a1,2 # point at anotherString.charAt(i++) + subu $v0,$a2,$a3 + +9: + j $ra + nop +END art_quick_string_compareto + +/* java.lang.String.indexOf(int ch, int fromIndex=0) */ +ENTRY_NO_GP art_quick_indexof +/* $a0 holds address of "this" */ +/* $a1 holds address of "ch" */ +/* $a2 holds address of "fromIndex" */ + lw $t0,MIRROR_STRING_COUNT_OFFSET($a0) # this.length() + subu $t0,$t0,$a2 # this.length() - offset + blez $t0,6f # if this.length()-offset <= 0 + li $v0,-1 # return -1; + + sll $v0,$a2,1 # $a0 += $a2 * 2 + daddu $a0,$a0,$v0 # " " " " " + move $v0,$a2 # Set i to offset. + +1: + lhu $t3,MIRROR_STRING_VALUE_OFFSET($a0) # if this.charAt(i) == ch + beq $t3,$a1,6f # return i; + daddu $a0,$a0,2 # i++ + subu $t0,$t0,1 # this.length() - i + bnez $t0,1b # while this.length() - i > 0 + addu $v0,$v0,1 # i++ + + li $v0,-1 # if this.length() - i <= 0 + # return -1; + +6: + j $ra + nop +END art_quick_indexof + + .set pop diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index ef5edbb227..fbacdbc930 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -113,7 +113,8 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMal GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) +// This is to be separately defined for each architecture to allow a hand-written assembly fast path. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc) diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 4a106e44c6..2f485ae644 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -788,6 +788,7 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 5c413d20f8..95f0ccb419 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -809,6 +809,7 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). DEFINE_FUNCTION art_quick_alloc_object_tlab // Fast path tlab allocation. diff --git a/runtime/asm_support.h b/runtime/asm_support.h index d98fc5179f..69f6fe96ff 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -19,6 +19,7 @@ #if defined(__cplusplus) #include "art_method.h" +#include "gc/allocator/rosalloc.h" #include "lock_word.h" #include "mirror/class.h" #include "mirror/string.h" @@ -53,6 +54,14 @@ static inline void CheckAsmSupportOffsetsAndSizes() { #define ADD_TEST_EQ(x, y) #endif +#if defined(__LP64__) +#define POINTER_SIZE_SHIFT 3 +#else +#define POINTER_SIZE_SHIFT 2 +#endif +ADD_TEST_EQ(static_cast<size_t>(1U << POINTER_SIZE_SHIFT), + static_cast<size_t>(__SIZEOF_POINTER__)) + // Size of references to the heap on the stack. #define STACK_REFERENCE_SIZE 4 ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReference<art::mirror::Object>)) @@ -62,6 +71,10 @@ ADD_TEST_EQ(static_cast<size_t>(STACK_REFERENCE_SIZE), sizeof(art::StackReferenc ADD_TEST_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE), sizeof(art::mirror::CompressedReference<art::mirror::Object>)) +#define COMPRESSED_REFERENCE_SIZE_SHIFT 2 +ADD_TEST_EQ(static_cast<size_t>(1U << COMPRESSED_REFERENCE_SIZE_SHIFT), + static_cast<size_t>(COMPRESSED_REFERENCE_SIZE)) + // Note: these callee save methods loads require read barriers. // Offset of field Runtime::callee_save_methods_[kSaveAll] #define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0 @@ -120,6 +133,18 @@ ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET, #define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.rosalloc_runs. +#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET, + art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top. +#define THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 34 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET, + art::Thread::ThreadLocalAllocStackTopOffset<__SIZEOF_POINTER__>().Int32Value()) +// Offset of field Thread::tlsPtr_.thread_local_alloc_stack_end. +#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 35 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET, + art::Thread::ThreadLocalAllocStackEndOffset<__SIZEOF_POINTER__>().Int32Value()) // Offsets within java.lang.Object. #define MIRROR_OBJECT_CLASS_OFFSET 0 @@ -236,6 +261,44 @@ ADD_TEST_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), art::kObjectAlignment - ADD_TEST_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), ~static_cast<uint32_t>(art::kObjectAlignment - 1)) +#define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128 +ADD_TEST_EQ(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 4 +ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSizeShift)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK 15 +ADD_TEST_EQ(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK, + static_cast<int32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32 0xfffffff0 +ADD_TEST_EQ(static_cast<uint32_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED32), + ~static_cast<uint32_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64 0xfffffffffffffff0 +ADD_TEST_EQ(static_cast<uint64_t>(ROSALLOC_BRACKET_QUANTUM_SIZE_MASK_TOGGLED64), + ~static_cast<uint64_t>(art::gc::allocator::RosAlloc::kBracketQuantumSize - 1)) + +#define ROSALLOC_RUN_FREE_LIST_OFFSET 8 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListOffset())) + +#define ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET 0 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListHeadOffset())) + +#define ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET 16 +ADD_TEST_EQ(ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunFreeListSizeOffset())) + +#define ROSALLOC_SLOT_NEXT_OFFSET 0 +ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, + static_cast<int32_t>(art::gc::allocator::RosAlloc::RunSlotNextOffset())) +// Assert this so that we can avoid zeroing the next field by installing the class pointer. +ADD_TEST_EQ(ROSALLOC_SLOT_NEXT_OFFSET, MIRROR_OBJECT_CLASS_OFFSET) + #if defined(__cplusplus) } // End of CheckAsmSupportOffsets. #endif diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index f58aaa6c0d..81622e14ed 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -1208,7 +1208,7 @@ mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, uint8_t* raw_arrays = nullptr; if (dex_file.GetOatDexFile() != nullptr && dex_file.GetOatDexFile()->GetDexCacheArrays() != nullptr) { - raw_arrays = const_cast<uint8_t*>(dex_file.GetOatDexFile()->GetDexCacheArrays()); + raw_arrays = dex_file.GetOatDexFile()->GetDexCacheArrays(); } else if (dex_file.NumStringIds() != 0u || dex_file.NumTypeIds() != 0u || dex_file.NumMethodIds() != 0u || dex_file.NumFieldIds() != 0u) { // NOTE: We "leak" the raw_arrays because we never destroy the dex cache. diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 87f1392920..3ce3d634f5 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -131,6 +131,7 @@ class RosAlloc { private: Slot* next_; // Next slot in the list. + friend class RosAlloc; }; // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to @@ -302,6 +303,7 @@ class RosAlloc { // free without traversing the whole free list. uint32_t size_; uint32_t padding_ ATTRIBUTE_UNUSED; + friend class RosAlloc; }; // Represents a run of memory slots of the same size. @@ -482,7 +484,7 @@ class RosAlloc { static constexpr uint8_t kMagicNumFree = 43; // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_. static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets; - // The number of smaller size brackets that are 16 bytes apart. + // The number of smaller size brackets that are the quantum size apart. static constexpr size_t kNumOfQuantumSizeBrackets = 32; // The sizes (the slot sizes, in bytes) of the size brackets. static size_t bracketSizes[kNumOfSizeBrackets]; @@ -520,9 +522,7 @@ class RosAlloc { } // Returns true if the given allocation size is for a thread local allocation. static bool IsSizeForThreadLocal(size_t size) { - DCHECK_GT(kNumThreadLocalSizeBrackets, 0U); - size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1; - bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx]; + bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; DCHECK(size > kLargeSizeThreshold || (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); return is_size_for_thread_local; @@ -634,6 +634,16 @@ class RosAlloc { // are less than this index. We use shared (current) runs for the rest. static const size_t kNumThreadLocalSizeBrackets = 8; + // The size of the largest bracket we use thread-local runs for. + // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. + static const size_t kMaxThreadLocalBracketSize = 128; + + // The bracket size increment for the brackets of size <= 512 bytes. + static constexpr size_t kBracketQuantumSize = 16; + + // Equal to Log2(kQuantumBracketSizeIncrement). + static constexpr size_t kBracketQuantumSizeShift = 4; + private: // The base address of the memory region that's managed by this allocator. uint8_t* base_; @@ -770,6 +780,19 @@ class RosAlloc { size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); ~RosAlloc(); + static size_t RunFreeListOffset() { + return OFFSETOF_MEMBER(Run, free_list_); + } + static size_t RunFreeListHeadOffset() { + return OFFSETOF_MEMBER(SlotFreeList<false>, head_); + } + static size_t RunFreeListSizeOffset() { + return OFFSETOF_MEMBER(SlotFreeList<false>, size_); + } + static size_t RunSlotNextOffset() { + return OFFSETOF_MEMBER(Slot, next_); + } + // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index e861921130..a162a4ea72 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -18,6 +18,7 @@ #include <dlfcn.h> #include <string.h> +#include <type_traits> #include <unistd.h> #include <cstdlib> @@ -388,13 +389,13 @@ bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file // Readjust to be non-inclusive upper bound. end_ += sizeof(uint32_t); - bss_begin_ = elf_file_->FindDynamicSymbolAddress("oatbss"); + bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss")); if (bss_begin_ == nullptr) { // No .bss section. Clear dlerror(). bss_end_ = nullptr; dlerror(); } else { - bss_end_ = elf_file_->FindDynamicSymbolAddress("oatbsslastword"); + bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword")); if (bss_end_ == nullptr) { *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file->GetPath().c_str()); @@ -407,10 +408,31 @@ bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file return Setup(abs_dex_location, error_msg); } +// Read an unaligned entry from the OatDexFile data in OatFile and advance the read +// position by the number of bytes read, i.e. sizeof(T). +// Return true on success, false if the read would go beyond the end of the OatFile. +template <typename T> +inline static bool ReadOatDexFileData(const OatFile& oat_file, + /*inout*/const uint8_t** oat, + /*out*/T* value) { + DCHECK(oat != nullptr); + DCHECK(value != nullptr); + DCHECK_LE(*oat, oat_file.End()); + if (UNLIKELY(static_cast<size_t>(oat_file.End() - *oat) < sizeof(T))) { + return false; + } + static_assert(std::is_trivial<T>::value, "T must be a trivial type"); + typedef __attribute__((__aligned__(1))) T unaligned_type; + *value = *reinterpret_cast<const unaligned_type*>(*oat); + *oat += sizeof(T); + return true; +} + bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) { if (!GetOatHeader().IsValid()) { std::string cause = GetOatHeader().GetValidationErrorMessage(); - *error_msg = StringPrintf("Invalid oat header for '%s': %s", GetLocation().c_str(), + *error_msg = StringPrintf("Invalid oat header for '%s': %s", + GetLocation().c_str(), cause.c_str()); return false; } @@ -424,35 +446,42 @@ bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) { oat += GetOatHeader().GetKeyValueStoreSize(); if (oat > End()) { *error_msg = StringPrintf("In oat file '%s' found truncated variable-size data: " - "%p + %zd + %ud <= %p", GetLocation().c_str(), - Begin(), sizeof(OatHeader), GetOatHeader().GetKeyValueStoreSize(), + "%p + %zu + %u <= %p", + GetLocation().c_str(), + Begin(), + sizeof(OatHeader), + GetOatHeader().GetKeyValueStoreSize(), End()); return false; } size_t pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet()); - const uint8_t* dex_cache_arrays = bss_begin_; + uint8_t* dex_cache_arrays = bss_begin_; uint32_t dex_file_count = GetOatHeader().GetDexFileCount(); oat_dex_files_storage_.reserve(dex_file_count); for (size_t i = 0; i < dex_file_count; i++) { - uint32_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat); - if (UNLIKELY(dex_file_location_size == 0U)) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with empty location name", - GetLocation().c_str(), i); + uint32_t dex_file_location_size; + if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_location_size))) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu truncated after dex file " + "location size", + GetLocation().c_str(), + i); return false; } - oat += sizeof(dex_file_location_size); - if (UNLIKELY(oat > End())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd truncated after dex file " - "location size", GetLocation().c_str(), i); + if (UNLIKELY(dex_file_location_size == 0U)) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with empty location name", + GetLocation().c_str(), + i); return false; } const char* dex_file_location_data = reinterpret_cast<const char*>(oat); oat += dex_file_location_size; if (UNLIKELY(oat > End())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with truncated dex file " - "location", GetLocation().c_str(), i); + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu with truncated dex file " + "location", + GetLocation().c_str(), + i); return false; } @@ -460,46 +489,61 @@ bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) { abs_dex_location, std::string(dex_file_location_data, dex_file_location_size)); - uint32_t dex_file_checksum = *reinterpret_cast<const uint32_t*>(oat); - oat += sizeof(dex_file_checksum); - if (UNLIKELY(oat > End())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated after " - "dex file checksum", GetLocation().c_str(), i, + uint32_t dex_file_checksum; + if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_checksum))) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated after " + "dex file checksum", + GetLocation().c_str(), + i, dex_file_location.c_str()); return false; } - uint32_t dex_file_offset = *reinterpret_cast<const uint32_t*>(oat); - if (UNLIKELY(dex_file_offset == 0U)) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with zero dex " - "file offset", GetLocation().c_str(), i, dex_file_location.c_str()); + uint32_t dex_file_offset; + if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &dex_file_offset))) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' truncated " + "after dex file offsets", + GetLocation().c_str(), + i, + dex_file_location.c_str()); return false; } - if (UNLIKELY(dex_file_offset > Size())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with dex file " - "offset %ud > %zd", GetLocation().c_str(), i, - dex_file_location.c_str(), dex_file_offset, Size()); + if (UNLIKELY(dex_file_offset == 0U)) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with zero dex " + "file offset", + GetLocation().c_str(), + i, + dex_file_location.c_str()); return false; } - oat += sizeof(dex_file_offset); - if (UNLIKELY(oat > End())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated " - "after dex file offsets", GetLocation().c_str(), i, - dex_file_location.c_str()); + if (UNLIKELY(dex_file_offset > Size())) { + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with dex file " + "offset %u > %zu", + GetLocation().c_str(), + i, + dex_file_location.c_str(), + dex_file_offset, + Size()); return false; } const uint8_t* dex_file_pointer = Begin() + dex_file_offset; if (UNLIKELY(!DexFile::IsMagicValid(dex_file_pointer))) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with invalid " - "dex file magic '%s'", GetLocation().c_str(), i, - dex_file_location.c_str(), dex_file_pointer); + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with invalid " + "dex file magic '%s'", + GetLocation().c_str(), + i, + dex_file_location.c_str(), + dex_file_pointer); return false; } if (UNLIKELY(!DexFile::IsVersionValid(dex_file_pointer))) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with invalid " - "dex file version '%s'", GetLocation().c_str(), i, - dex_file_location.c_str(), dex_file_pointer); + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with invalid " + "dex file version '%s'", + GetLocation().c_str(), + i, + dex_file_location.c_str(), + dex_file_pointer); return false; } const DexFile::Header* header = reinterpret_cast<const DexFile::Header*>(dex_file_pointer); @@ -507,21 +551,26 @@ bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) { oat += (sizeof(*methods_offsets_pointer) * header->class_defs_size_); if (UNLIKELY(oat > End())) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with truncated " - "method offsets", GetLocation().c_str(), i, + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with truncated " + "method offsets", + GetLocation().c_str(), + i, dex_file_location.c_str()); return false; } - const uint8_t* current_dex_cache_arrays = nullptr; + uint8_t* current_dex_cache_arrays = nullptr; if (dex_cache_arrays != nullptr) { DexCacheArraysLayout layout(pointer_size, *header); if (layout.Size() != 0u) { if (static_cast<size_t>(bss_end_ - dex_cache_arrays) < layout.Size()) { - *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' with " - "truncated dex cache arrays, %zd < %zd.", - GetLocation().c_str(), i, dex_file_location.c_str(), - static_cast<size_t>(bss_end_ - dex_cache_arrays), layout.Size()); + *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with " + "truncated dex cache arrays, %zu < %zu.", + GetLocation().c_str(), + i, + dex_file_location.c_str(), + static_cast<size_t>(bss_end_ - dex_cache_arrays), + layout.Size()); return false; } current_dex_cache_arrays = dex_cache_arrays; @@ -553,7 +602,7 @@ bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) { if (dex_cache_arrays != bss_end_) { // We expect the bss section to be either empty (dex_cache_arrays and bss_end_ // both null) or contain just the dex cache arrays and nothing else. - *error_msg = StringPrintf("In oat file '%s' found unexpected bss size bigger by %zd bytes.", + *error_msg = StringPrintf("In oat file '%s' found unexpected bss size bigger by %zu bytes.", GetLocation().c_str(), static_cast<size_t>(bss_end_ - dex_cache_arrays)); return false; @@ -661,7 +710,7 @@ OatFile::OatDexFile::OatDexFile(const OatFile* oat_file, uint32_t dex_file_location_checksum, const uint8_t* dex_file_pointer, const uint32_t* oat_class_offsets_pointer, - const uint8_t* dex_cache_arrays) + uint8_t* dex_cache_arrays) : oat_file_(oat_file), dex_file_location_(dex_file_location), canonical_dex_file_location_(canonical_dex_file_location), diff --git a/runtime/oat_file.h b/runtime/oat_file.h index 34f014123b..6acdf86208 100644 --- a/runtime/oat_file.h +++ b/runtime/oat_file.h @@ -301,10 +301,10 @@ class OatFile FINAL { const uint8_t* end_; // Pointer to the .bss section, if present, otherwise null. - const uint8_t* bss_begin_; + uint8_t* bss_begin_; // Pointer to the end of the .bss section, if present, otherwise null. - const uint8_t* bss_end_; + uint8_t* bss_end_; // Was this oat_file loaded executable? const bool is_executable_; @@ -396,7 +396,7 @@ class OatDexFile FINAL { // Returns the offset to the OatClass information. Most callers should use GetOatClass. uint32_t GetOatClassOffset(uint16_t class_def_index) const; - const uint8_t* GetDexCacheArrays() const { + uint8_t* GetDexCacheArrays() const { return dex_cache_arrays_; } @@ -409,7 +409,7 @@ class OatDexFile FINAL { uint32_t dex_file_checksum, const uint8_t* dex_file_pointer, const uint32_t* oat_class_offsets_pointer, - const uint8_t* dex_cache_arrays); + uint8_t* dex_cache_arrays); const OatFile* const oat_file_; const std::string dex_file_location_; @@ -417,7 +417,7 @@ class OatDexFile FINAL { const uint32_t dex_file_location_checksum_; const uint8_t* const dex_file_pointer_; const uint32_t* const oat_class_offsets_pointer_; - const uint8_t* const dex_cache_arrays_; + uint8_t* const dex_cache_arrays_; friend class OatFile; DISALLOW_COPY_AND_ASSIGN(OatDexFile); diff --git a/runtime/thread.h b/runtime/thread.h index 8cea10c844..8f3461acdf 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -626,6 +626,24 @@ class Thread { return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, thread_local_objects)); } + template<size_t pointer_size> + static ThreadOffset<pointer_size> RosAllocRunsOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + rosalloc_runs)); + } + + template<size_t pointer_size> + static ThreadOffset<pointer_size> ThreadLocalAllocStackTopOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + thread_local_alloc_stack_top)); + } + + template<size_t pointer_size> + static ThreadOffset<pointer_size> ThreadLocalAllocStackEndOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + thread_local_alloc_stack_end)); + } + // Size of stack less any space reserved for stack overflow size_t GetStackSize() const { return tlsPtr_.stack_size - (tlsPtr_.stack_end - tlsPtr_.stack_begin); diff --git a/test/540-checker-rtp-bug/expected.txt b/test/540-checker-rtp-bug/expected.txt new file mode 100644 index 0000000000..2cf2842aa5 --- /dev/null +++ b/test/540-checker-rtp-bug/expected.txt @@ -0,0 +1 @@ +instanceof failed diff --git a/test/540-checker-rtp-bug/info.txt b/test/540-checker-rtp-bug/info.txt new file mode 100644 index 0000000000..852cd7c1b4 --- /dev/null +++ b/test/540-checker-rtp-bug/info.txt @@ -0,0 +1 @@ +Test that we set the proper types for objects (b/25008765). diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java new file mode 100644 index 0000000000..e9f16c04d9 --- /dev/null +++ b/test/540-checker-rtp-bug/src/Main.java @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +final class Final { + public String toString() { + return "final"; + } +} + +public class Main { + /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) reference_type_propagation (after) + /// CHECK: <<Phi:l\d+>> Phi klass:java.lang.Object + /// CHECK: <<Class:l\d+>> LoadClass + /// CHECK: CheckCast [<<Phi>>,<<Class>>] + /// CHECK: <<Ret:l\d+>> BoundType [<<Phi>>] klass:Final + /// CHECK: Return [<<Ret>>] + + /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) instruction_simplifier_after_types (after) + /// CHECK: <<Phi:l\d+>> Phi + /// CHECK: <<Class:l\d+>> LoadClass + /// CHECK: CheckCast [<<Phi>>,<<Class>>] + /// CHECK: <<Ret:l\d+>> BoundType [<<Phi>>] + /// CHECK: Return [<<Ret>>] + public static Final testKeepCheckCast(Object o, boolean cond) { + Object x = new Final(); + while (cond) { + x = o; + cond = false; + } + return (Final) x; + } + + /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) reference_type_propagation (after) + /// CHECK: <<Phi:l\d+>> Phi klass:java.lang.Object + /// CHECK: <<Class:l\d+>> LoadClass + /// CHECK: InstanceOf [<<Phi>>,<<Class>>] + + /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) dead_code_elimination (after) + /// CHECK: <<Phi:l\d+>> Phi + /// CHECK: <<Class:l\d+>> LoadClass + /// CHECK: InstanceOf [<<Phi>>,<<Class>>] + public static void testKeepInstanceOf(Object o, boolean cond) { + Object x = new Final(); + while (cond) { + x = o; + cond = false; + } + if (x instanceof Final) { + System.out.println("instanceof succeed"); + } else { + System.out.println("instanceof failed"); + } + } + + /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) reference_type_propagation (after) + /// CHECK: <<Phi:l\d+>> Phi klass:java.lang.Object + /// CHECK: <<NC:l\d+>> NullCheck [<<Phi>>] + /// CHECK: <<Ret:l\d+>> InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString + /// CHECK: Return [<<Ret>>] + + /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) inliner (after) + /// CHECK: <<Phi:l\d+>> Phi + /// CHECK: <<NC:l\d+>> NullCheck [<<Phi>>] + /// CHECK: <<Ret:l\d+>> InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString + /// CHECK: Return [<<Ret>>] + public static String testNoInline(Object o, boolean cond) { + Object x = new Final(); + while (cond) { + x = o; + cond = false; + } + return x.toString(); + } + + public static void main(String[] args) { + try { + testKeepCheckCast(new Object(), true); + throw new Error("Expected check cast exception"); + } catch (ClassCastException e) { + // expected + } + + testKeepInstanceOf(new Object(), true); + + if ("final".equals(testNoInline(new Object(), true))) { + throw new Error("Bad inlining"); + } + } +} |