diff options
35 files changed, 879 insertions, 281 deletions
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 85216b7610..bc51ed6e6a 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -22,6 +22,7 @@ #include "base/stringpiece.h" #include "base/time_utils.h" #include "base/timing_logger.h" +#include "base/unix_file/fd_file.h" #include "compiler_callbacks.h" #include "dex/pass_manager.h" #include "dex/quick_compiler_callbacks.h" @@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() { return new JitCompiler(); } -extern "C" void* jit_load(CompilerCallbacks** callbacks) { +extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) { VLOG(jit) << "loading jit compiler"; auto* const jit_compiler = JitCompiler::Create(); CHECK(jit_compiler != nullptr); *callbacks = jit_compiler->GetCompilerCallbacks(); + *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo(); VLOG(jit) << "Done loading jit compiler"; return jit_compiler; } @@ -160,9 +162,28 @@ JitCompiler::JitCompiler() : total_time_(0) { // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); compiler_driver_->SetSupportBootImageFixup(false); + + if (compiler_options_->GetGenerateDebugInfo()) { +#ifdef __ANDROID__ + const char* prefix = GetAndroidData(); +#else + const char* prefix = "/tmp"; +#endif + DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u) + << "Generating debug info only works with one compiler thread"; + std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map"; + perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str())); + if (perf_file_ == nullptr) { + LOG(FATAL) << "Could not create perf file at " << perf_filename; + } + } } JitCompiler::~JitCompiler() { + if (perf_file_ != nullptr) { + UNUSED(perf_file_->Flush()); + UNUSED(perf_file_->Close()); + } } bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { @@ -188,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); + if (success && compiler_options_->GetGenerateDebugInfo()) { + const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); + std::ostringstream stream; + stream << std::hex + << reinterpret_cast<uintptr_t>(ptr) + << " " + << code_cache->GetMemorySizeOfCodePointer(ptr) + << " " + << PrettyMethod(method_to_compile) + << std::endl; + std::string str = stream.str(); + bool res = perf_file_->WriteFully(str.c_str(), str.size()); + CHECK(res); + } } // Trim maps to reduce memory usage. diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 913a6d00ae..037a18ac7a 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -43,6 +43,9 @@ class JitCompiler { size_t GetTotalCompileTime() const { return total_time_; } + CompilerOptions* GetCompilerOptions() const { + return compiler_options_.get(); + } private: uint64_t total_time_; @@ -53,6 +56,7 @@ class JitCompiler { std::unique_ptr<CompilerCallbacks> callbacks_; std::unique_ptr<CompilerDriver> compiler_driver_; std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + std::unique_ptr<File> perf_file_; JitCompiler(); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 0156187765..322912976e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1539,8 +1539,10 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { bool use_imm = rhs_location.IsConstant(); Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; - uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue; - uint32_t shift_value = rhs_imm & shift_mask; + const uint32_t shift_mask = (type == Primitive::kPrimInt) + ? kMaxIntShiftValue + : kMaxLongShiftValue; + const uint32_t shift_value = rhs_imm & shift_mask; // Are the INS (Insert Bit Field) and ROTR instructions supported? bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); @@ -1580,6 +1582,11 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Rotrv(dst, lhs, rhs_reg); } else { __ Subu(TMP, ZERO, rhs_reg); + // 32-bit shift instructions use the 5 least significant bits of the shift count, so + // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case + // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out + // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`, + // IOW, the OR'd values are equal. __ Sllv(TMP, lhs, TMP); __ Srlv(dst, lhs, rhs_reg); __ Or(dst, dst, TMP); @@ -1643,33 +1650,33 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { } } } else { - shift_value -= kMipsBitsPerWord; + const uint32_t shift_value_high = shift_value - kMipsBitsPerWord; if (instr->IsShl()) { - __ Sll(dst_high, lhs_low, shift_value); + __ Sll(dst_high, lhs_low, shift_value_high); __ Move(dst_low, ZERO); } else if (instr->IsShr()) { - __ Sra(dst_low, lhs_high, shift_value); + __ Sra(dst_low, lhs_high, shift_value_high); __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1); } else if (instr->IsUShr()) { - __ Srl(dst_low, lhs_high, shift_value); + __ Srl(dst_low, lhs_high, shift_value_high); __ Move(dst_high, ZERO); } else { - if (shift_value == 0) { + if (shift_value == kMipsBitsPerWord) { // 64-bit rotation by 32 is just a swap. __ Move(dst_low, lhs_high); __ Move(dst_high, lhs_low); } else { if (has_ins_rotr) { - __ Srl(dst_low, lhs_high, shift_value); - __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value, shift_value); - __ Srl(dst_high, lhs_low, shift_value); - __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Srl(dst_low, lhs_high, shift_value_high); + __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high); } else { - __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value); - __ Srl(dst_low, lhs_high, shift_value); + __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_low, lhs_high, shift_value_high); __ Or(dst_low, dst_low, TMP); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); - __ Srl(dst_high, lhs_low, shift_value); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); __ Or(dst_high, dst_high, TMP); } } @@ -4797,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type input_type = conversion->GetInputType(); Primitive::Type result_type = conversion->GetResultType(); DCHECK_NE(input_type, result_type); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { @@ -4804,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { } LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { + if (!isR6 && + ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || + (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { call_kind = LocationSummary::kCall; } @@ -4843,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4888,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimLong) { + if (isR6) { + // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + __ Mtc1(src_low, FTMP); + __ Mthc1(src_high, FTMP); + if (result_type == Primitive::kPrimFloat) { + __ Cvtsl(dst, FTMP); + } else { + __ Cvtdl(dst, FTMP); + } + } else { + int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) + : QUICK_ENTRY_POINT(pL2d); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) + : IsDirectEntrypoint(kQuickL2d); + codegen_->InvokeRuntime(entry_offset, + conversion, + conversion->GetDexPc(), + nullptr, + direct); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } + } + } else { Register src = locations->InAt(0).AsRegister<Register>(); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src, FTMP); @@ -4897,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } else { __ Cvtdw(dst, FTMP); } - } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) - : IsDirectEntrypoint(kQuickL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); - } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); - } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - bool direct; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz) - : IsDirectEntrypoint(kQuickD2iz); + if (result_type == Primitive::kPrimLong) { + if (isR6) { + // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + Register dst_low = locations->Out().AsRegisterPairLow<Register>(); + MipsLabel truncate; + MipsLabel done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); + } else { + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + __ Mthc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); + } + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ Move(dst_low, ZERO); + __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst_high, dst_high, TMP); + + __ B(&done); + + __ Bind(&truncate); + + if (input_type == Primitive::kPrimFloat) { + __ TruncLS(FTMP, src); + } else { + __ TruncLD(FTMP, src); + } + __ Mfc1(dst_low, FTMP); + __ Mfhc1(dst_high, FTMP); + + __ Bind(&done); + } else { + int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) + : QUICK_ENTRY_POINT(pD2l); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) + : IsDirectEntrypoint(kQuickD2l); + codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct); + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) - : IsDirectEntrypoint(kQuickD2l); - } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type != Primitive::kPrimLong) { + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst = locations->Out().AsRegister<Register>(); + MipsLabel truncate; + MipsLabel done; + + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // For details see the large comment above for the truncation of float/double to long on R6. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + if (fpu_32bit) { + __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); + } else { + __ Mthc1(TMP, FTMP); + } } - } else { + + if (isR6) { + if (input_type == Primitive::kPrimFloat) { + __ CmpLeS(FTMP, FTMP, src); + } else { + __ CmpLeD(FTMP, FTMP, src); + } + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + } else { + if (input_type == Primitive::kPrimFloat) { + __ ColeS(0, FTMP, src); + } else { + __ ColeD(0, FTMP, src); + } + __ Bc1t(0, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CeqS(0, src, src); + } else { + __ CeqD(0, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Movf(dst, ZERO, 0); + } + + __ B(&done); + + __ Bind(&truncate); + if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); + + __ Bind(&done); } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index abfaae4b50..38c32cad06 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3932,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { - call_kind = LocationSummary::kCall; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion); - if (call_kind == LocationSummary::kNoCall) { - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - - if (Primitive::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + if (Primitive::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); } else { - InvokeRuntimeCallingConvention calling_convention; - - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RequiresRegister()); + } - locations->SetOut(calling_convention.GetReturnLocation(result_type)); + if (Primitive::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -4006,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - __ Mtc1(src, FTMP); + FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); + if (input_type == Primitive::kPrimLong) { + __ Dmtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - __ Cvtsw(dst, FTMP); + __ Cvtsl(dst, FTMP); } else { - __ Cvtdw(dst, FTMP); + __ Cvtdl(dst, FTMP); } } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); + __ Mtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + __ Cvtsw(dst, FTMP); } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + __ Cvtdw(dst, FTMP); } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); + Mips64Label truncate; + Mips64Label done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); + uint64_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst64(TMP, min_val); + __ Dmtc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); - if (result_type != Primitive::kPrimLong) { + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + if (result_type == Primitive::kPrimLong) { + __ LoadConst64(dst, std::numeric_limits<int64_t>::min()); + } else { + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + } + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + + __ Bc(&done); + + __ Bind(&truncate); + + if (result_type == Primitive::kPrimLong) { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + __ TruncLS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + __ TruncLD(FTMP, src); } + __ Dmfc1(dst, FTMP); } else { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); } + + __ Bind(&done); } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index c60a4eacaa..4784de1380 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask); } - if (entry.num_dex_registers == 0) { + if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { // No dex map available. stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap); } else { diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 560502fde6..604787fd92 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.EndStackMapEntry(); + number_of_dex_registers = 1; + stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { CodeInfo code_info(region); StackMapEncoding encoding = code_info.ExtractEncoding(); ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); @@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); + + stack_map = code_info.GetStackMapAt(1, encoding); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding))); + ASSERT_EQ(1u, stack_map.GetDexPc(encoding)); + ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding)); + ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); } TEST(StackMapTest, InlineTest) { diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 8c462436a7..ac9c097892 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -1035,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) { EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); } +void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncLD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncWS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D); +} + +void MipsAssembler::TruncWD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D); +} + void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); } @@ -1051,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) { EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21); } +void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20); +} + +void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); +} + void MipsAssembler::Mfc1(Register rt, FRegister fs) { EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 9aed3463b7..01c6490f88 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -265,10 +265,16 @@ class MipsAssembler FINAL : public Assembler { void Movf(Register rd, Register rs, int cc); // R2 void Movt(Register rd, Register rs, int cc); // R2 + void TruncLS(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncLD(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncWS(FRegister fd, FRegister fs); + void TruncWD(FRegister fd, FRegister fs); void Cvtsw(FRegister fd, FRegister fs); void Cvtdw(FRegister fd, FRegister fs); void Cvtsd(FRegister fd, FRegister fs); void Cvtds(FRegister fd, FRegister fs); + void Cvtsl(FRegister fd, FRegister fs); // R2+, FR=1 + void Cvtdl(FRegister fd, FRegister fs); // R2+, FR=1 void Mfc1(Register rt, FRegister fs); void Mtc1(Register rt, FRegister fs); diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 4361843c54..5fc3deebd3 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -599,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW"); } +TEST_F(AssemblerMIPSTest, CvtSL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL"); +} + +TEST_F(AssemblerMIPSTest, CvtDL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL"); +} + TEST_F(AssemblerMIPSTest, CvtSD) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD"); } @@ -607,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS"); } +TEST_F(AssemblerMIPSTest, TruncWS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS"); +} + +TEST_F(AssemblerMIPSTest, TruncWD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD"); +} + +TEST_F(AssemblerMIPSTest, TruncLS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS"); +} + +TEST_F(AssemblerMIPSTest, TruncLD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD"); +} + TEST_F(AssemblerMIPSTest, Mfc1) { DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1"); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index cfd8421e93..f9ff2df8bb 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -771,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc); } +void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + +void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa); } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 883f013f87..3262640ce7 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -250,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler { void RoundLD(FpuRegister fd, FpuRegister fs); void RoundWS(FpuRegister fd, FpuRegister fs); void RoundWD(FpuRegister fd, FpuRegister fs); + void TruncLS(FpuRegister fd, FpuRegister fs); + void TruncLD(FpuRegister fd, FpuRegister fs); + void TruncWS(FpuRegister fd, FpuRegister fs); + void TruncWD(FpuRegister fd, FpuRegister fs); void CeilLS(FpuRegister fd, FpuRegister fs); void CeilLD(FpuRegister fd, FpuRegister fs); void CeilWS(FpuRegister fd, FpuRegister fs); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index bac4375b35..7d79be2731 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -527,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w"); } +TEST_F(AssemblerMIPS64Test, TruncWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d"); +} + +TEST_F(AssemblerMIPS64Test, TruncLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d"); +} + //////////////// // CALL / JMP // //////////////// diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 0691f2a620..699ab3e65a 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -1312,7 +1312,114 @@ END \name .endm // Generate the allocation entrypoints for each allocator. -GENERATE_ALL_ALLOC_ENTRYPOINTS +GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_rosalloc + + # Fast path rosalloc allocation + # a0: type_idx + # a1: ArtMethod* + # s1: Thread::Current + # ----------------------------- + # t0: class + # t1: object size + # t2: rosalloc run + # t3: thread stack top offset + # t4: thread stack bottom offset + # v0: free list head + # + # t5, t6 : temps + + lw $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1) # Load dex cache resolved types + # array. + + sll $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT # Shift the value. + addu $t5, $t0, $t5 # Compute the index. + lw $t0, 0($t5) # Load class (t0). + beqz $t0, .Lart_quick_alloc_object_rosalloc_slow_path + + li $t6, MIRROR_CLASS_STATUS_INITIALIZED + lw $t5, MIRROR_CLASS_STATUS_OFFSET($t0) # Check class status. + bne $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path + + # Add a fake dependence from the following access flag and size loads to the status load. This + # is to prevent those loads from being reordered above the status load and reading wrong values. + xor $t5, $t5, $t5 + addu $t0, $t0, $t5 + + lw $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0) # Check if access flags has + li $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE # kAccClassIsFinalizable. + and $t6, $t5, $t6 + bnez $t6, .Lart_quick_alloc_object_rosalloc_slow_path + + lw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation + lw $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # stack has any room left. + bgeu $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path + + lw $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0) # Load object size (t1). + li $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local + # allocation. + bgtu $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path + + # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket + # quantum size and divide by the quantum size and subtract by 1. + + addiu $t1, $t1, -1 # Decrease obj size and shift right + srl $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT # by quantum. + + sll $t2, $t1, POINTER_SIZE_SHIFT + addu $t2, $t2, $s1 + lw $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2) # Load rosalloc run (t2). + + # Load the free list head (v0). + # NOTE: this will be the return val. + + lw $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) + beqz $v0, .Lart_quick_alloc_object_rosalloc_slow_path + nop + + # Load the next pointer of the head and update the list head with the next pointer. + + lw $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0) + sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) + + # Store the class pointer in the header. This also overwrites the first pointer. The offsets are + # asserted to match. + +#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET +#error "Class pointer needs to overwrite next pointer." +#endif + + POISON_HEAP_REF $t0 + sw $t0, MIRROR_OBJECT_CLASS_OFFSET($v0) + + # Push the new object onto the thread local allocation stack and increment the thread local + # allocation stack top. + + sw $v0, 0($t3) + addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE + sw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) + + # Decrement the size of the free list. + + lw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) + addiu $t5, $t5, -1 + sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) + + sync # Fence. + + jalr $zero, $ra + nop + + .Lart_quick_alloc_object_rosalloc_slow_path: + + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME + jal artAllocObjectFromCodeRosAlloc + move $a2 ,$s1 # Pass self as argument. + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +END art_quick_alloc_object_rosalloc /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S index 75e0037145..617f572c0f 100644 --- a/runtime/interpreter/mterp/arm/footer.S +++ b/runtime/interpreter/mterp/arm/footer.S @@ -128,9 +128,11 @@ MterpCheckSuspendAndContinue: */ MterpFallback: EXPORT_PC +#if MTERP_LOGGING mov r0, rSELF add r1, rFP, #OFF_FP_SHADOWFRAME bl MterpLogFallback +#endif MterpCommonFallback: mov r0, #0 @ signal retry with reference interpreter. b MterpDone @@ -144,9 +146,6 @@ MterpCommonFallback: * uint32_t* rFP (should still be live, pointer to base of vregs) */ MterpExceptionReturn: - ldr r2, [rFP, #OFF_FP_RESULT_REGISTER] - str r0, [r2] - str r1, [r2, #4] mov r0, #1 @ signal return to caller. b MterpDone MterpReturn: diff --git a/runtime/interpreter/mterp/arm/op_aget.S b/runtime/interpreter/mterp/arm/op_aget.S index 2cc4d66565..11f7079c3f 100644 --- a/runtime/interpreter/mterp/arm/op_aget.S +++ b/runtime/interpreter/mterp/arm/op_aget.S @@ -1,11 +1,11 @@ -%default { "load":"ldr", "shift":"2", "is_object":"0", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" } +%default { "load":"ldr", "shift":"2", "data_offset":"MIRROR_INT_ARRAY_DATA_OFFSET" } /* * Array get, 32 bits or less. vAA <- vBB[vCC]. * * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -25,9 +25,5 @@ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST $load r2, [r0, #$data_offset] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if $is_object - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/op_iget_object_quick.S b/runtime/interpreter/mterp/arm/op_iget_object_quick.S index 1f8dc5afb2..fe29106d47 100644 --- a/runtime/interpreter/mterp/arm/op_iget_object_quick.S +++ b/runtime/interpreter/mterp/arm/op_iget_object_quick.S @@ -1 +1,17 @@ -%include "arm/op_iget_quick.S" {"is_object":"1"} + /* For: iget-object-quick */ + /* op vA, vB, offset@CCCC */ + mov r2, rINST, lsr #12 @ r2<- B + FETCH r1, 1 @ r1<- field byte offset + GET_VREG r0, r2 @ r0<- object we're operating on + cmp r0, #0 @ check object for null + beq common_errNullObject @ object was null + bl artIGetObjectFromMterp @ (obj, offset) + ldr r3, [rSELF, #THREAD_EXCEPTION_OFFSET] + ubfx r2, rINST, #8, #4 @ r2<- A + PREFETCH_INST 2 + cmp r3, #0 + bne MterpPossibleException @ bail out + SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 + ADVANCE 2 @ advance rPC + GET_INST_OPCODE ip @ extract opcode from rINST + GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/arm/op_iget_quick.S b/runtime/interpreter/mterp/arm/op_iget_quick.S index 9229afcd1e..0eaf364f6b 100644 --- a/runtime/interpreter/mterp/arm/op_iget_quick.S +++ b/runtime/interpreter/mterp/arm/op_iget_quick.S @@ -1,5 +1,5 @@ -%default { "load":"ldr", "is_object":"0" } - /* For: iget-quick, iget-object-quick */ +%default { "load":"ldr" } + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -9,10 +9,6 @@ beq common_errNullObject @ object was null $load r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if $is_object - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 060fe76aab..9975458b85 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -607,5 +607,14 @@ extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t i } } +extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (UNLIKELY(obj == nullptr)) { + ThrowNullPointerExceptionFromInterpreter(); + return nullptr; + } + return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset)); +} + } // namespace interpreter } // namespace art diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S index 33036e6cd7..2d6f057aa7 100644 --- a/runtime/interpreter/mterp/out/mterp_arm.S +++ b/runtime/interpreter/mterp/out/mterp_arm.S @@ -2013,7 +2013,7 @@ artMterpAsmInstructionStart = .L_op_nop * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -2033,11 +2033,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH_ADVANCE_INST 2 @ advance rPC, load rINST ldr r2, [r0, #MIRROR_INT_ARRAY_DATA_OFFSET] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if 0 - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction /* ------------------------------ */ @@ -2106,7 +2102,7 @@ artMterpAsmInstructionStart = .L_op_nop * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -2126,11 +2122,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH_ADVANCE_INST 2 @ advance rPC, load rINST ldrb r2, [r0, #MIRROR_BOOLEAN_ARRAY_DATA_OFFSET] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if 0 - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction @@ -2145,7 +2137,7 @@ artMterpAsmInstructionStart = .L_op_nop * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -2165,11 +2157,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH_ADVANCE_INST 2 @ advance rPC, load rINST ldrsb r2, [r0, #MIRROR_BYTE_ARRAY_DATA_OFFSET] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if 0 - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction @@ -2184,7 +2172,7 @@ artMterpAsmInstructionStart = .L_op_nop * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -2204,11 +2192,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH_ADVANCE_INST 2 @ advance rPC, load rINST ldrh r2, [r0, #MIRROR_CHAR_ARRAY_DATA_OFFSET] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if 0 - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction @@ -2223,7 +2207,7 @@ artMterpAsmInstructionStart = .L_op_nop * Note: using the usual FETCH/and/shift stuff, this fits in exactly 17 * instructions. We use a pair of FETCH_Bs instead. * - * for: aget, aget-object, aget-boolean, aget-byte, aget-char, aget-short + * for: aget, aget-boolean, aget-byte, aget-char, aget-short * * NOTE: assumes data offset for arrays is the same for all non-wide types. * If this changes, specialize. @@ -2243,11 +2227,7 @@ artMterpAsmInstructionStart = .L_op_nop FETCH_ADVANCE_INST 2 @ advance rPC, load rINST ldrsh r2, [r0, #MIRROR_SHORT_ARRAY_DATA_OFFSET] @ r2<- vBB[vCC] GET_INST_OPCODE ip @ extract opcode from rINST - .if 0 - SET_VREG_OBJECT r2, r9 @ vAA<- r2 - .else SET_VREG r2, r9 @ vAA<- r2 - .endif GOTO_OPCODE ip @ jump to next instruction @@ -7127,7 +7107,7 @@ constvalop_long_to_double: .balign 128 .L_op_iget_quick: /* 0xe3 */ /* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -7137,11 +7117,7 @@ constvalop_long_to_double: beq common_errNullObject @ object was null ldr r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 0 - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -7167,26 +7143,24 @@ constvalop_long_to_double: .balign 128 .L_op_iget_object_quick: /* 0xe5 */ /* File: arm/op_iget_object_quick.S */ -/* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-object-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset - GET_VREG r3, r2 @ r3<- object we're operating on - ubfx r2, rINST, #8, #4 @ r2<- A - cmp r3, #0 @ check object for null + GET_VREG r0, r2 @ r0<- object we're operating on + cmp r0, #0 @ check object for null beq common_errNullObject @ object was null - ldr r0, [r3, r1] @ r0<- obj.field - FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 1 + bl artIGetObjectFromMterp @ (obj, offset) + ldr r3, [rSELF, #THREAD_EXCEPTION_OFFSET] + ubfx r2, rINST, #8, #4 @ r2<- A + PREFETCH_INST 2 + cmp r3, #0 + bne MterpPossibleException @ bail out SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else - SET_VREG r0, r2 @ fp[A]<- r0 - .endif + ADVANCE 2 @ advance rPC GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction - /* ------------------------------ */ .balign 128 .L_op_iput_quick: /* 0xe6 */ @@ -7373,7 +7347,7 @@ constvalop_long_to_double: .L_op_iget_boolean_quick: /* 0xef */ /* File: arm/op_iget_boolean_quick.S */ /* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -7383,11 +7357,7 @@ constvalop_long_to_double: beq common_errNullObject @ object was null ldrb r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 0 - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -7397,7 +7367,7 @@ constvalop_long_to_double: .L_op_iget_byte_quick: /* 0xf0 */ /* File: arm/op_iget_byte_quick.S */ /* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -7407,11 +7377,7 @@ constvalop_long_to_double: beq common_errNullObject @ object was null ldrsb r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 0 - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -7421,7 +7387,7 @@ constvalop_long_to_double: .L_op_iget_char_quick: /* 0xf1 */ /* File: arm/op_iget_char_quick.S */ /* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -7431,11 +7397,7 @@ constvalop_long_to_double: beq common_errNullObject @ object was null ldrh r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 0 - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -7445,7 +7407,7 @@ constvalop_long_to_double: .L_op_iget_short_quick: /* 0xf2 */ /* File: arm/op_iget_short_quick.S */ /* File: arm/op_iget_quick.S */ - /* For: iget-quick, iget-object-quick */ + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */ /* op vA, vB, offset@CCCC */ mov r2, rINST, lsr #12 @ r2<- B FETCH r1, 1 @ r1<- field byte offset @@ -7455,11 +7417,7 @@ constvalop_long_to_double: beq common_errNullObject @ object was null ldrsh r0, [r3, r1] @ r0<- obj.field FETCH_ADVANCE_INST 2 @ advance rPC, load rINST - .if 0 - SET_VREG_OBJECT r0, r2 @ fp[A]<- r0 - .else SET_VREG r0, r2 @ fp[A]<- r0 - .endif GET_INST_OPCODE ip @ extract opcode from rINST GOTO_OPCODE ip @ jump to next instruction @@ -12204,9 +12162,11 @@ MterpCheckSuspendAndContinue: */ MterpFallback: EXPORT_PC +#if MTERP_LOGGING mov r0, rSELF add r1, rFP, #OFF_FP_SHADOWFRAME bl MterpLogFallback +#endif MterpCommonFallback: mov r0, #0 @ signal retry with reference interpreter. b MterpDone @@ -12220,9 +12180,6 @@ MterpCommonFallback: * uint32_t* rFP (should still be live, pointer to base of vregs) */ MterpExceptionReturn: - ldr r2, [rFP, #OFF_FP_RESULT_REGISTER] - str r0, [r2] - str r1, [r2, #4] mov r0, #1 @ signal return to caller. b MterpDone MterpReturn: diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index ab70f4c158..05668a97b3 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -64,10 +64,14 @@ void Jit::AddTimingLogger(const TimingLogger& logger) { cumulative_timings_.AddLogger(logger); } -Jit::Jit() - : jit_library_handle_(nullptr), jit_compiler_handle_(nullptr), jit_load_(nullptr), - jit_compile_method_(nullptr), dump_info_on_shutdown_(false), - cumulative_timings_("JIT timings"), save_profiling_info_(false) { +Jit::Jit() : jit_library_handle_(nullptr), + jit_compiler_handle_(nullptr), + jit_load_(nullptr), + jit_compile_method_(nullptr), + dump_info_on_shutdown_(false), + cumulative_timings_("JIT timings"), + save_profiling_info_(false), + generate_debug_info_(false) { } Jit* Jit::Create(JitOptions* options, std::string* error_msg) { @@ -77,7 +81,10 @@ Jit* Jit::Create(JitOptions* options, std::string* error_msg) { return nullptr; } jit->code_cache_.reset(JitCodeCache::Create( - options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg)); + options->GetCodeCacheInitialCapacity(), + options->GetCodeCacheMaxCapacity(), + jit->generate_debug_info_, + error_msg)); if (jit->GetCodeCache() == nullptr) { return nullptr; } @@ -99,7 +106,7 @@ bool Jit::LoadCompiler(std::string* error_msg) { *error_msg = oss.str(); return false; } - jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**)>( + jit_load_ = reinterpret_cast<void* (*)(CompilerCallbacks**, bool*)>( dlsym(jit_library_handle_, "jit_load")); if (jit_load_ == nullptr) { dlclose(jit_library_handle_); @@ -121,9 +128,10 @@ bool Jit::LoadCompiler(std::string* error_msg) { return false; } CompilerCallbacks* callbacks = nullptr; + bool will_generate_debug_symbols = false; VLOG(jit) << "Calling JitLoad interpreter_only=" << Runtime::Current()->GetInstrumentation()->InterpretOnly(); - jit_compiler_handle_ = (jit_load_)(&callbacks); + jit_compiler_handle_ = (jit_load_)(&callbacks, &will_generate_debug_symbols); if (jit_compiler_handle_ == nullptr) { dlclose(jit_library_handle_); *error_msg = "JIT couldn't load compiler"; @@ -136,6 +144,7 @@ bool Jit::LoadCompiler(std::string* error_msg) { return false; } compiler_callbacks_ = callbacks; + generate_debug_info_ = will_generate_debug_symbols; return true; } diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 0edce2fa49..42bbbe73c7 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -86,7 +86,7 @@ class Jit { // JIT compiler void* jit_library_handle_; void* jit_compiler_handle_; - void* (*jit_load_)(CompilerCallbacks**); + void* (*jit_load_)(CompilerCallbacks**, bool*); void (*jit_unload_)(void*); bool (*jit_compile_method_)(void*, ArtMethod*, Thread*); @@ -99,6 +99,7 @@ class Jit { CompilerCallbacks* compiler_callbacks_; // Owned by the jit compiler. bool save_profiling_info_; + bool generate_debug_info_; DISALLOW_COPY_AND_ASSIGN(Jit); }; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 1ac57b1d84..bf3bd3c4aa 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -49,8 +49,16 @@ static constexpr int kProtCode = PROT_READ | PROT_EXEC; JitCodeCache* JitCodeCache::Create(size_t initial_capacity, size_t max_capacity, + bool generate_debug_info, std::string* error_msg) { CHECK_GE(max_capacity, initial_capacity); + + // Generating debug information is mostly for using the 'perf' tool, which does + // not work with ashmem. + bool use_ashmem = !generate_debug_info; + // With 'perf', we want a 1-1 mapping between an address and a method. + bool garbage_collect_code = !generate_debug_info; + // We need to have 32 bit offsets from method headers in code cache which point to things // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. // Ensure we're below 1 GB to be safe. @@ -65,7 +73,7 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, std::string error_str; // Map name specific for android_os_Debug.cpp accounting. MemMap* data_map = MemMap::MapAnonymous( - "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str); + "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str, use_ashmem); if (data_map == nullptr) { std::ostringstream oss; oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; @@ -84,7 +92,8 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, DCHECK_EQ(code_size + data_size, max_capacity); uint8_t* divider = data_map->Begin() + data_size; - MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str); + MemMap* code_map = + data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem); if (code_map == nullptr) { std::ostringstream oss; oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; @@ -95,14 +104,16 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, data_size = initial_capacity / 2; code_size = initial_capacity - data_size; DCHECK_EQ(code_size + data_size, initial_capacity); - return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity); + return new JitCodeCache( + code_map, data_map, code_size, data_size, garbage_collect_code, max_capacity); } JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map, size_t initial_code_capacity, size_t initial_data_capacity, - size_t max_capacity) + size_t max_capacity, + bool garbage_collect_code) : lock_("Jit code cache", kJitCodeCacheLock), lock_cond_("Jit code cache variable", lock_), collection_in_progress_(false), @@ -113,7 +124,8 @@ JitCodeCache::JitCodeCache(MemMap* code_map, code_end_(initial_code_capacity), data_end_(initial_data_capacity), has_done_one_collection_(false), - last_update_time_ns_(0) { + last_update_time_ns_(0), + garbage_collect_code_(garbage_collect_code) { code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/); data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/); @@ -516,7 +528,11 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { // we hold the lock. { MutexLock mu(self, lock_); - if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) { + if (!garbage_collect_code_) { + IncreaseCodeCacheCapacity(); + NotifyCollectionDone(self); + return; + } else if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) { has_done_one_collection_ = false; NotifyCollectionDone(self); return; @@ -730,5 +746,10 @@ void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSE info->SetIsMethodBeingCompiled(false); } +size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) { + MutexLock mu(Thread::Current(), lock_); + return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr))); +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 1c842e4aa7..a152bcd2d4 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -53,7 +53,10 @@ class JitCodeCache { // Create the code cache with a code + data capacity equal to "capacity", error message is passed // in the out arg error_msg. - static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg); + static JitCodeCache* Create(size_t initial_capacity, + size_t max_capacity, + bool generate_debug_info, + std::string* error_msg); // Number of bytes allocated in the code cache. size_t CodeCacheSize() REQUIRES(!lock_); @@ -159,13 +162,16 @@ class JitCodeCache { return current_capacity_; } + size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_); + private: // Take ownership of maps. JitCodeCache(MemMap* code_map, MemMap* data_map, size_t initial_code_capacity, size_t initial_data_capacity, - size_t max_capacity); + size_t max_capacity, + bool garbage_collect_code); // Internal version of 'CommitCode' that will not retry if the // allocation fails. Return null if the allocation fails. @@ -252,6 +258,9 @@ class JitCodeCache { // It is atomic to avoid locking when reading it. Atomic<uint64_t> last_update_time_ns_; + // Whether we can do garbage collection. + const bool garbage_collect_code_; + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index e133847b06..3571edb277 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -34,14 +34,11 @@ #include "thread-inl.h" #include "utils.h" -#define USE_ASHMEM 1 - -#ifdef USE_ASHMEM #include <cutils/ashmem.h> + #ifndef ANDROID_OS #include <sys/resource.h> #endif -#endif #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON @@ -282,7 +279,8 @@ MemMap* MemMap::MapAnonymous(const char* name, int prot, bool low_4gb, bool reuse, - std::string* error_msg) { + std::string* error_msg, + bool use_ashmem) { #ifndef __LP64__ UNUSED(low_4gb); #endif @@ -303,17 +301,17 @@ MemMap* MemMap::MapAnonymous(const char* name, ScopedFd fd(-1); -#ifdef USE_ASHMEM -#ifdef __ANDROID__ - const bool use_ashmem = true; -#else - // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't - // fail due to ulimit restrictions. If they will then use a regular mmap. - struct rlimit rlimit_fsize; - CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0); - const bool use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) || - (page_aligned_byte_count < rlimit_fsize.rlim_cur); -#endif + if (use_ashmem) { + if (!kIsTargetBuild) { + // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't + // fail due to ulimit restrictions. If they will then use a regular mmap. + struct rlimit rlimit_fsize; + CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlimit_fsize), 0); + use_ashmem = (rlimit_fsize.rlim_cur == RLIM_INFINITY) || + (page_aligned_byte_count < rlimit_fsize.rlim_cur); + } + } + if (use_ashmem) { // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are // prefixed "dalvik-". @@ -326,7 +324,6 @@ MemMap* MemMap::MapAnonymous(const char* name, } flags &= ~MAP_ANONYMOUS; } -#endif // We need to store and potentially set an error number for pretty printing of errors int saved_errno = 0; @@ -508,7 +505,7 @@ MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_ } MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot, - std::string* error_msg) { + std::string* error_msg, bool use_ashmem) { DCHECK_GE(new_end, Begin()); DCHECK_LE(new_end, End()); DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_); @@ -532,23 +529,22 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_pro DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end); DCHECK_ALIGNED(tail_base_size, kPageSize); -#ifdef USE_ASHMEM - // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are - // prefixed "dalvik-". - std::string debug_friendly_name("dalvik-"); - debug_friendly_name += tail_name; - ScopedFd fd(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size)); - int flags = MAP_PRIVATE | MAP_FIXED; - if (fd.get() == -1) { - *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", - tail_name, strerror(errno)); - return nullptr; - } -#else - ScopedFd fd(-1); + int int_fd = -1; int flags = MAP_PRIVATE | MAP_ANONYMOUS; -#endif - + if (use_ashmem) { + // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are + // prefixed "dalvik-". + std::string debug_friendly_name("dalvik-"); + debug_friendly_name += tail_name; + int_fd = ashmem_create_region(debug_friendly_name.c_str(), tail_base_size); + flags = MAP_PRIVATE | MAP_FIXED; + if (int_fd == -1) { + *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", + tail_name, strerror(errno)); + return nullptr; + } + } + ScopedFd fd(int_fd); MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size); // Unmap/map the tail region. diff --git a/runtime/mem_map.h b/runtime/mem_map.h index efce09ae94..ed213659e3 100644 --- a/runtime/mem_map.h +++ b/runtime/mem_map.h @@ -57,17 +57,18 @@ class MemMap { // "reuse" allows re-mapping an address range from an existing mapping. // // The word "anonymous" in this context means "not backed by a file". The supplied - // 'ashmem_name' will be used -- on systems that support it -- to give the mapping + // 'name' will be used -- on systems that support it -- to give the mapping // a name. // // On success, returns returns a MemMap instance. On failure, returns null. - static MemMap* MapAnonymous(const char* ashmem_name, + static MemMap* MapAnonymous(const char* name, uint8_t* addr, size_t byte_count, int prot, bool low_4gb, bool reuse, - std::string* error_msg); + std::string* error_msg, + bool use_ashmem = true); // Create placeholder for a region allocated by direct call to mmap. // This is useful when we do not have control over the code calling mmap, @@ -168,7 +169,8 @@ class MemMap { MemMap* RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot, - std::string* error_msg); + std::string* error_msg, + bool use_ashmem = true); static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map) REQUIRES(!Locks::mem_maps_lock_); diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 9cb37eed58..786cf06e2d 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -221,18 +221,22 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* CodeInfo code_info = handler_method_header_->GetOptimizedCodeInfo(); StackMapEncoding encoding = code_info.ExtractEncoding(); + // Find stack map of the catch block. + StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding); + DCHECK(catch_stack_map.IsValid()); + DexRegisterMap catch_vreg_map = + code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs); + if (!catch_vreg_map.IsValid()) { + return; + } + // Find stack map of the throwing instruction. StackMap throw_stack_map = code_info.GetStackMapForNativePcOffset(stack_visitor->GetNativePcOffset(), encoding); DCHECK(throw_stack_map.IsValid()); DexRegisterMap throw_vreg_map = code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs); - - // Find stack map of the catch block. - StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding); - DCHECK(catch_stack_map.IsValid()); - DexRegisterMap catch_vreg_map = - code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs); + DCHECK(throw_vreg_map.IsValid()); // Copy values between them. for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) { @@ -387,6 +391,10 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { number_of_vregs) : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs); + if (!vreg_map.IsValid()) { + return; + } + for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) { if (updated_vregs != nullptr && updated_vregs[vreg]) { // Keep the value set by debugger. diff --git a/runtime/stack.cc b/runtime/stack.cc index 9098d38bb0..5faff93b97 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -322,6 +322,9 @@ bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKin number_of_dex_registers) : code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); + if (!dex_register_map.IsValid()) { + return false; + } DexRegisterLocation::Kind location_kind = dex_register_map.GetLocationKind(vreg, number_of_dex_registers, code_info, encoding); switch (location_kind) { diff --git a/runtime/stack_map.h b/runtime/stack_map.h index a15a08180e..84185ce49f 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -473,6 +473,9 @@ class DexRegisterLocationCatalog { class DexRegisterMap { public: explicit DexRegisterMap(MemoryRegion region) : region_(region) {} + DexRegisterMap() {} + + bool IsValid() const { return region_.pointer() != nullptr; } // Get the surface kind of Dex register `dex_register_number`. DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_number, @@ -1136,11 +1139,14 @@ class CodeInfo { DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, const StackMapEncoding& encoding, uint32_t number_of_dex_registers) const { - DCHECK(stack_map.HasDexRegisterMap(encoding)); - uint32_t offset = GetDexRegisterMapsOffset(encoding) - + stack_map.GetDexRegisterMapOffset(encoding); - size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers); - return DexRegisterMap(region_.Subregion(offset, size)); + if (!stack_map.HasDexRegisterMap(encoding)) { + return DexRegisterMap(); + } else { + uint32_t offset = GetDexRegisterMapsOffset(encoding) + + stack_map.GetDexRegisterMapOffset(encoding); + size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers); + return DexRegisterMap(region_.Subregion(offset, size)); + } } // Return the `DexRegisterMap` pointed by `inline_info` at depth `depth`. @@ -1148,11 +1154,14 @@ class CodeInfo { InlineInfo inline_info, const StackMapEncoding& encoding, uint32_t number_of_dex_registers) const { - DCHECK(inline_info.HasDexRegisterMapAtDepth(depth)); - uint32_t offset = GetDexRegisterMapsOffset(encoding) - + inline_info.GetDexRegisterMapOffsetAtDepth(depth); - size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers); - return DexRegisterMap(region_.Subregion(offset, size)); + if (!inline_info.HasDexRegisterMapAtDepth(depth)) { + return DexRegisterMap(); + } else { + uint32_t offset = GetDexRegisterMapsOffset(encoding) + + inline_info.GetDexRegisterMapOffsetAtDepth(depth); + size_t size = ComputeDexRegisterMapSizeOf(offset, number_of_dex_registers); + return DexRegisterMap(region_.Subregion(offset, size)); + } } InlineInfo GetInlineInfoOf(StackMap stack_map, const StackMapEncoding& encoding) const { diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc new file mode 100644 index 0000000000..54879fbad9 --- /dev/null +++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iostream> + +#include "base/casts.h" +#include "base/macros.h" +#include "java_vm_ext.h" +#include "jni_env_ext.h" +#include "thread-inl.h" + +namespace art { +namespace { + +static volatile std::atomic<bool> vm_was_shutdown(false); + +extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) { + // Wait until the runtime is shutdown. + while (!vm_was_shutdown.load()) { + usleep(1000); + } + std::cout << "About to call exception check\n"; + env->ExceptionCheck(); + LOG(ERROR) << "Should not be reached!"; +} + +// NO_RETURN does not work with extern "C" for target builds. +extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) { + // Fake up the managed stack so we can detach. + Thread* const self = Thread::Current(); + self->SetTopOfStack(nullptr); + self->SetTopOfShadowStack(nullptr); + JavaVM* vm = down_cast<JNIEnvExt*>(env)->vm; + vm->DetachCurrentThread(); + vm->DestroyJavaVM(); + vm_was_shutdown.store(true); + // Give threads some time to get stuck in ExceptionCheck. + usleep(1000000); + if (env != nullptr) { + // Use env != nullptr to trick noreturn. + exit(0); + } +} + +} // namespace +} // namespace art diff --git a/test/136-daemon-jni-shutdown/expected.txt b/test/136-daemon-jni-shutdown/expected.txt new file mode 100644 index 0000000000..f0b6353e9f --- /dev/null +++ b/test/136-daemon-jni-shutdown/expected.txt @@ -0,0 +1,5 @@ +JNI_OnLoad called +About to call exception check +About to call exception check +About to call exception check +About to call exception check diff --git a/test/136-daemon-jni-shutdown/info.txt b/test/136-daemon-jni-shutdown/info.txt new file mode 100644 index 0000000000..06a12dff9e --- /dev/null +++ b/test/136-daemon-jni-shutdown/info.txt @@ -0,0 +1 @@ +Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash.
\ No newline at end of file diff --git a/test/136-daemon-jni-shutdown/src/Main.java b/test/136-daemon-jni-shutdown/src/Main.java new file mode 100644 index 0000000000..6eceb757b1 --- /dev/null +++ b/test/136-daemon-jni-shutdown/src/Main.java @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Test that daemon threads that call into a JNI env after the runtime is shutdown do not crash. + */ +public class Main { + + public final static int THREAD_COUNT = 4; + + public static void main(String[] args) throws Exception { + System.loadLibrary(args[0]); + + for (int i = 0; i < THREAD_COUNT; i++) { + Thread t = new Thread(new DaemonRunnable()); + t.setDaemon(true); + t.start(); + } + // Give threads time to start and become stuck in waitAndCallIntoJniEnv. + Thread.sleep(1000); + destroyJavaVMAndExit(); + } + + static native void waitAndCallIntoJniEnv(); + static native void destroyJavaVMAndExit(); + + private static class DaemonRunnable implements Runnable { + public void run() { + for (;;) { + waitAndCallIntoJniEnv(); + } + } + } +} diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index f74a516486..b922b4576e 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -30,6 +30,7 @@ LIBARTTEST_COMMON_SRC_FILES := \ 051-thread/thread_test.cc \ 117-nopatchoat/nopatchoat.cc \ 1337-gc-coverage/gc_coverage.cc \ + 136-daemon-jni-shutdown/daemon_jni_shutdown.cc \ 137-cfi/cfi.cc \ 139-register-natives/regnative.cc \ 141-class-unload/jni_unload.cc \ diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index ee6b7aa4f7..c9343d48cd 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -548,6 +548,11 @@ TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \ 484-checker-register-hints \ 537-checker-arraycopy +# Tests that should fail in the read barrier configuration with JIT. +# 141: Disabled because of intermittent failures on the ART Builtbot (b/25866001). +TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \ + 141-class-unload + ifeq ($(ART_USE_READ_BARRIER),true) ifneq (,$(filter default,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ @@ -562,10 +567,18 @@ ifeq ($(ART_USE_READ_BARRIER),true) $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) endif + + ifneq (,$(filter jit,$(COMPILER_TYPES))) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ + $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ + $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + endif endif TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS := TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := +TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler. # 137: Quick has no support for read barriers and punts to the @@ -873,20 +886,20 @@ define define-test-art-run-test ifeq ($(9),multiimage) test_groups += ART_RUN_TEST_$$(uc_host_or_target)_IMAGE_RULES run_test_options += --multi-image - ifeq ($(1),host) - prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13)) - else - prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13)) - endif + ifeq ($(1),host) + prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13)) + else + prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_no-pic_multi_$(13)) + endif else ifeq ($(9),multipicimage) test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES - run_test_options += --pic-image --multi-image - ifeq ($(1),host) - prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13)) - else - prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13)) - endif + run_test_options += --pic-image --multi-image + ifeq ($(1),host) + prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13)) + else + prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_pic_multi_$(13)) + endif else $$(error found $(9) expected $(IMAGE_TYPES)) endif diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt index 3d25d5f807..2cb2c50257 100644 --- a/tools/libcore_failures_concurrent_collector.txt +++ b/tools/libcore_failures_concurrent_collector.txt @@ -24,10 +24,12 @@ bug: 26155567 }, { - description: "TimeoutException on host-x86-concurrent-collector", + description: "TimeoutException on host-{x86,x86-64}-concurrent-collector", result: EXEC_FAILED, - names: ["libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries, - libcore.java.util.zip.ZipInputStreamTest#testLongMessage"], + modes: [host], + names: ["libcore.java.util.zip.GZIPOutputStreamTest#testSyncFlushEnabled", + "libcore.java.util.zip.ZipFileTest#testZipFileWithLotsOfEntries", + "libcore.java.util.zip.ZipInputStreamTest#testLongMessage"], bug: 26507762 } ] |