diff options
79 files changed, 1817 insertions, 444 deletions
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 7a34683bb5..7c87a60084 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -39,6 +39,7 @@ #include "gc/accounting/card_table-inl.h" #include "gc/accounting/heap_bitmap.h" #include "gc/accounting/space_bitmap-inl.h" +#include "gc/collector/concurrent_copying.h" #include "gc/heap.h" #include "gc/space/large_object_space.h" #include "gc/space/space-inl.h" @@ -1377,6 +1378,8 @@ void ImageWriter::CalculateNewObjectOffsets() { runtime->GetCalleeSaveMethod(Runtime::kRefsOnly); image_methods_[ImageHeader::kRefsAndArgsSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs); + image_methods_[ImageHeader::kSaveEverythingMethod] = + runtime->GetCalleeSaveMethod(Runtime::kSaveEverything); // Visit image methods first to have the main runtime methods in the first image. for (auto* m : image_methods_) { CHECK(m != nullptr); @@ -1823,6 +1826,11 @@ void ImageWriter::CopyAndFixupObject(Object* obj) { const auto it = saved_hashcode_map_.find(obj); dst->SetLockWord(it != saved_hashcode_map_.end() ? LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false); + if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) { + // Treat all of the objects in the image as marked to avoid unnecessary dirty pages. This is + // safe since we mark all of the objects that may reference non immune objects as gray. + CHECK(dst->AtomicSetMarkBit(0, 1)); + } FixupObject(obj, dst); } diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 626a975aa6..7d13656b54 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -217,8 +217,7 @@ class ImageWriter FINAL { // uint32 = typeof(lockword_) // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK // failures due to invalid read barrier bits during object field reads. - static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - - LockWord::kReadBarrierStateSize; + static const size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits - LockWord::kGCStateSize; // 111000.....0 static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift; diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index d4dd978c5c..2471f798be 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -31,10 +31,6 @@ uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, } uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it - // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk - // of code. To avoid any alignment discrepancies for the final chunk, we always align the - // offset after reserving of writing any chunk. uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u), @@ -46,7 +42,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { unprocessed_patches_.clear(); thunk_locations_.push_back(aligned_offset); - offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); + offset = aligned_offset + thunk_code_.size(); } return offset; } @@ -65,13 +61,7 @@ uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) { return 0u; } - uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); - // Align after writing chunk, see the ReserveSpace() above. - offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); - aligned_code_delta = offset - thunk_end_offset; - if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } + offset = aligned_offset + thunk_code_.size(); } return offset; } @@ -92,7 +82,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, MethodReference method_ref, uint32_t max_extra_space) { uint32_t quick_code_size = compiled_method->GetQuickCode().size(); - uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); // Adjust for extra space required by the subclass. next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space); @@ -106,9 +96,9 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, if (needs_thunk) { // A single thunk will cover all pending patches. unprocessed_patches_.clear(); - uint32_t thunk_location = compiled_method->AlignCode(offset); + uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_); thunk_locations_.push_back(thunk_location); - offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); + offset = thunk_location + thunk_code_.size(); } } for (const LinkerPatch& patch : compiled_method->GetPatches()) { diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index a8078e3049..eace3d4326 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -48,18 +48,18 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { const ArrayRef<const LinkerPatch>& method3_patches, uint32_t distance_without_thunks) { CHECK_EQ(distance_without_thunks % kArmAlignment, 0u); - const uint32_t method1_offset = - CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader); + uint32_t method1_offset = + kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); // We want to put the method3 at a very precise offset. const uint32_t method3_offset = method1_offset + distance_without_thunks; - CHECK_ALIGNED(method3_offset - sizeof(OatQuickMethodHeader), kArmAlignment); + CHECK_ALIGNED(method3_offset, kArmAlignment); // Calculate size of method2 so that we put method3 at the correct place. + const uint32_t method1_end = method1_offset + method1_code.size(); const uint32_t method2_offset = - CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) + - sizeof(OatQuickMethodHeader); + method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader); const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); std::vector<uint8_t> method2_raw_code(method2_size); ArrayRef<const uint8_t> method2_code(method2_raw_code); @@ -78,8 +78,11 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { if (result3.second == method3_offset + 1 /* thumb mode */) { return false; // No thunk. } else { - uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2); - CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */); + uint32_t thunk_end = + CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) + + ThunkSize(); + uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); + CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */); return true; // Thunk present. } } @@ -352,9 +355,12 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { uint32_t method1_offset = GetMethodOffset(1u); uint32_t method3_offset = GetMethodOffset(3u); + ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset)); uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); - ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset)); - uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2); + uint32_t thunk_offset = + RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2)); + DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()), + method3_header_offset); ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); ASSERT_EQ(diff & 1u, 0u); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index fdd14be4c4..4c8788e30d 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -83,7 +83,7 @@ uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, // Now that we have the actual offset where the code will be placed, locate the ADRP insns // that actually require the thunk. - uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); DCHECK(compiled_method != nullptr); diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index 09729fdf96..573de736c4 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -67,36 +67,39 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { const ArrayRef<const LinkerPatch>& last_method_patches, uint32_t distance_without_thunks) { CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u); - const uint32_t method1_offset = - CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + uint32_t method1_offset = + kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); - const uint32_t gap_start = - CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64); + const uint32_t gap_start = method1_offset + method1_code.size(); // We want to put the method3 at a very precise offset. const uint32_t last_method_offset = method1_offset + distance_without_thunks; + CHECK_ALIGNED(last_method_offset, kArm64Alignment); const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); - CHECK_ALIGNED(gap_end, kArm64Alignment); - // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB). + // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB). // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB - // offsets by this test.) + // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate + // methods the same alignment of the end, so the thunk insertion adds a predictable size as + // long as it's after the first chunk.) uint32_t method_idx = 2u; constexpr uint32_t kSmallChunkSize = 2 * MB; std::vector<uint8_t> gap_code; - size_t gap_size = gap_end - gap_start; - for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) { - uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader); + uint32_t gap_size = gap_end - gap_start; + uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u; + uint32_t chunk_start = gap_start; + uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize; + for (uint32_t i = 0; i <= num_small_chunks; ++i) { // num_small_chunks+1 iterations. + uint32_t chunk_code_size = + chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader); gap_code.resize(chunk_code_size, 0u); AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), ArrayRef<const LinkerPatch>()); method_idx += 1u; + chunk_start += chunk_size; + chunk_size = kSmallChunkSize; // For all but the first chunk. + DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start)); } - uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader); - gap_code.resize(chunk_code_size, 0u); - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), - ArrayRef<const LinkerPatch>()); - method_idx += 1u; // Add the last method and link AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches); @@ -109,8 +112,9 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { // There may be a thunk before method2. if (last_result.second != last_method_offset) { // Thunk present. Check that there's only one. - uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64); - CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size); + uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize(); + uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); + CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader)); } return method_idx; } @@ -341,7 +345,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { uint32_t dex_cache_arrays_begin, uint32_t element_offset) { uint32_t method1_offset = - CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); ASSERT_LT(method1_offset, adrp_offset); CHECK_ALIGNED(adrp_offset, 4u); uint32_t num_nops = (adrp_offset - method1_offset) / 4u; @@ -391,7 +395,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { bool has_thunk, uint32_t string_offset) { uint32_t method1_offset = - CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); ASSERT_LT(method1_offset, adrp_offset); CHECK_ALIGNED(adrp_offset, 4u); uint32_t num_nops = (adrp_offset - method1_offset) / 4u; @@ -614,10 +618,12 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { uint32_t method1_offset = GetMethodOffset(1u); uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset)); uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); - ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset)); - uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64); - ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset)); + uint32_t thunk_offset = + RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64)); + DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()), + last_method_header_offset); uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); CHECK_ALIGNED(diff, 4u); ASSERT_LT(diff, 128 * MB); diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index ec69107d8f..d21f33e46f 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -98,6 +98,14 @@ class RelativePatcherTest : public testing::Test { patches)); } + uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) { + // We want to align the code rather than the preheader. + uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader); + uint32_t aligned_code_offset = + CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_); + return aligned_code_offset - unaligned_code_offset; + } + void Link() { // Reserve space. static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset."); @@ -106,9 +114,8 @@ class RelativePatcherTest : public testing::Test { for (auto& compiled_method : compiled_methods_) { offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]); - uint32_t aligned_offset = compiled_method->AlignCode(offset); - uint32_t aligned_code_delta = aligned_offset - offset; - offset += aligned_code_delta; + uint32_t alignment_size = CodeAlignmentSize(offset); + offset += alignment_size; offset += sizeof(OatQuickMethodHeader); uint32_t quick_code_offset = offset + compiled_method->CodeDelta(); @@ -136,11 +143,10 @@ class RelativePatcherTest : public testing::Test { for (auto& compiled_method : compiled_methods_) { offset = patcher_->WriteThunks(&out_, offset); - uint32_t aligned_offset = compiled_method->AlignCode(offset); - uint32_t aligned_code_delta = aligned_offset - offset; - CHECK_LE(aligned_code_delta, sizeof(kPadding)); - out_.WriteFully(kPadding, aligned_code_delta); - offset += aligned_code_delta; + uint32_t alignment_size = CodeAlignmentSize(offset); + CHECK_LE(alignment_size, sizeof(kPadding)); + out_.WriteFully(kPadding, alignment_size); + offset += alignment_size; out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader)); offset += sizeof(OatQuickMethodHeader); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index f20c715f58..8273b15667 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -87,6 +87,13 @@ class ChecksumUpdatingOutputStream : public OutputStream { OatHeader* const oat_header_; }; +inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& compiled_method) { + // We want to align the code rather than the preheader. + uint32_t unaligned_code_offset = header_offset + sizeof(OatQuickMethodHeader); + uint32_t aligned_code_offset = compiled_method.AlignCode(unaligned_code_offset); + return aligned_code_offset - unaligned_code_offset; +} + } // anonymous namespace // Defines the location of the raw dex file to write. @@ -817,8 +824,8 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { uint32_t thumb_offset) { offset_ = writer_->relative_patcher_->ReserveSpace( offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); - offset_ = compiled_method->AlignCode(offset_); - DCHECK_ALIGNED_PARAM(offset_, + offset_ += CodeAlignmentSize(offset_, *compiled_method); + DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader), GetInstructionSetAlignment(compiled_method->GetInstructionSet())); return offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; } @@ -1011,17 +1018,16 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { ReportWriteFailure("relative call thunk", it); return false; } - uint32_t aligned_offset = compiled_method->AlignCode(offset_); - uint32_t aligned_code_delta = aligned_offset - offset_; - if (aligned_code_delta != 0) { - if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { + uint32_t alignment_size = CodeAlignmentSize(offset_, *compiled_method); + if (alignment_size != 0) { + if (!writer_->WriteCodeAlignment(out, alignment_size)) { ReportWriteFailure("code alignment padding", it); return false; } - offset_ += aligned_code_delta; + offset_ += alignment_size; DCHECK_OFFSET_(); } - DCHECK_ALIGNED_PARAM(offset_, + DCHECK_ALIGNED_PARAM(offset_ + sizeof(OatQuickMethodHeader), GetInstructionSetAlignment(compiled_method->GetInstructionSet())); DCHECK_EQ(method_offsets.code_offset_, offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4a4b98cc48..a5493ab9f8 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -765,16 +765,24 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); - if (locations->OnlyCallsOnSlowPath()) { - // In case of slow path, we currently set the location of caller-save registers - // to register (instead of their stack location when pushed before the slow-path - // call). Therefore register_mask contains both callee-save and caller-save - // registers that hold objects. We must remove the caller-save from the mask, since - // they will be overwritten by the callee. - register_mask &= core_callee_save_mask_; + if (instruction->IsSuspendCheck()) { + // Suspend check has special ABI that saves the caller-save registers in callee, + // so we want to emit stack maps containing the registers. + // TODO: Register allocator still reserves space for the caller-save registers. + // We should add slow-path-specific caller-save information into LocationSummary + // and refactor the code here as well as in the register allocator to use it. + } else { + if (locations->OnlyCallsOnSlowPath()) { + // In case of slow path, we currently set the location of caller-save registers + // to register (instead of their stack location when pushed before the slow-path + // call). Therefore register_mask contains both callee-save and caller-save + // registers that hold objects. We must remove the caller-save from the mask, since + // they will be overwritten by the callee. + register_mask &= core_callee_save_mask_; + } + // The register mask must be a subset of callee-save registers. + DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } - // The register mask must be a subset of callee-save registers. - DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index c18b79394c..c105940f28 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -119,11 +119,9 @@ class SuspendCheckSlowPathARM : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); } else { @@ -1289,6 +1287,44 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } +void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) { + Primitive::Type type = instruction->InputAt(0)->GetType(); + Location lhs_loc = instruction->GetLocations()->InAt(0); + Location rhs_loc = instruction->GetLocations()->InAt(1); + if (rhs_loc.IsConstant()) { + // 0.0 is the only immediate that can be encoded directly in + // a VCMP instruction. + // + // Both the JLS (section 15.20.1) and the JVMS (section 6.5) + // specify that in a floating-point comparison, positive zero + // and negative zero are considered equal, so we can use the + // literal 0.0 for both cases here. + // + // Note however that some methods (Float.equal, Float.compare, + // Float.compareTo, Double.equal, Double.compare, + // Double.compareTo, Math.max, Math.min, StrictMath.max, + // StrictMath.min) consider 0.0 to be (strictly) greater than + // -0.0. So if we ever translate calls to these methods into a + // HCompare instruction, we must handle the -0.0 case with + // care here. + DCHECK(rhs_loc.GetConstant()->IsArithmeticZero()); + if (type == Primitive::kPrimFloat) { + __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>()); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>())); + } + } else { + if (type == Primitive::kPrimFloat) { + __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>()); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>())); + } + } +} + void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label ATTRIBUTE_UNUSED) { @@ -1389,22 +1425,14 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; - LocationSummary* locations = condition->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: GenerateLongComparesAndJumps(condition, true_target, false_target); break; case Primitive::kPrimFloat: - __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); - GenerateFPJumps(condition, true_target, false_target); - break; case Primitive::kPrimDouble: - __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), - FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); + GenerateVcmp(condition); GenerateFPJumps(condition, true_target, false_target); break; default: @@ -1585,7 +1613,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -1632,12 +1660,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { GenerateLongComparesAndJumps(cond, &true_label, &false_label); break; case Primitive::kPrimFloat: - __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); - GenerateFPJumps(cond, &true_label, &false_label); - break; case Primitive::kPrimDouble: - __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), - FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); + GenerateVcmp(cond); GenerateFPJumps(cond, &true_label, &false_label); break; } @@ -3654,7 +3678,7 @@ void LocationsBuilderARM::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister()); break; } @@ -3699,12 +3723,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { __ LoadImmediate(out, 0); - if (type == Primitive::kPrimFloat) { - __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); - } else { - __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), - FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); - } + GenerateVcmp(compare); __ vmstat(); // transfer FP status register to ARM APSR. less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); break; @@ -3998,6 +4017,17 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } } +Location LocationsBuilderARM::ArithmeticZeroOrFpuRegister(HInstruction* input) { + DCHECK(input->GetType() == Primitive::kPrimDouble || input->GetType() == Primitive::kPrimFloat) + << input->GetType(); + if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || + (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { + return Location::ConstantLocation(input->AsConstant()); + } else { + return Location::RequiresFpuRegister(); + } +} + Location LocationsBuilderARM::ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode) { DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index f9fcabd903..fa7709b9a3 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -180,6 +180,7 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode); @@ -281,6 +282,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); + void GenerateVcmp(HInstruction* instruction); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 115cee6492..54c9efcafe 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -398,11 +398,9 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { @@ -609,6 +607,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { DCHECK_NE(obj_.reg(), LR); DCHECK_NE(obj_.reg(), WSP); DCHECK_NE(obj_.reg(), WZR); + // WIP0 is used by the slow path as a temp, it can not be the object register. + DCHECK_NE(obj_.reg(), IP0); DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); // "Compact" slow path, saving two moves. // @@ -751,10 +751,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); - DCHECK(index_.IsRegisterPair()); - // UnsafeGet's offset location is a register pair, the low - // part contains the correct offset. - index = index_.ToLow(); + DCHECK(index_.IsRegister()); } } @@ -1284,17 +1281,21 @@ void CodeGeneratorARM64::MoveLocation(Location destination, UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; - if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) { - temp = temps.AcquireW(); - } else if (src_cst->IsLongConstant()) { - temp = temps.AcquireX(); - } else if (src_cst->IsFloatConstant()) { - temp = temps.AcquireS(); + if (src_cst->IsZeroBitPattern()) { + temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr; } else { - DCHECK(src_cst->IsDoubleConstant()); - temp = temps.AcquireD(); + if (src_cst->IsIntConstant()) { + temp = temps.AcquireW(); + } else if (src_cst->IsLongConstant()) { + temp = temps.AcquireX(); + } else if (src_cst->IsFloatConstant()) { + temp = temps.AcquireS(); + } else { + DCHECK(src_cst->IsDoubleConstant()); + temp = temps.AcquireD(); + } + MoveConstant(temp, src_cst); } - MoveConstant(temp, src_cst); __ Str(temp, StackOperandFrom(destination)); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 240936c176..1b5fa857e7 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -243,7 +243,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { } Arm64Assembler* GetAssembler() const { return assembler_; } - vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, @@ -364,7 +364,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { private: Arm64Assembler* GetAssembler() const; vixl::aarch64::MacroAssembler* GetVIXLAssembler() const { - return GetAssembler()->vixl_masm_; + return GetAssembler()->GetVIXLAssembler(); } CodeGeneratorARM64* const codegen_; @@ -413,7 +413,7 @@ class CodeGeneratorARM64 : public CodeGenerator { HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; } const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; } - vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } // Emit a write barrier. void MarkGCCard(vixl::aarch64::Register object, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8dd82ef9cb..59e103a3bd 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -351,14 +351,12 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this, IsDirectEntrypoint(kQuickTestSuspend)); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 3472830379..fe1fddc7bf 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -300,13 +300,11 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ Bc(GetReturnLabel()); } else { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a2fa24542c..ade21174f4 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -192,13 +192,11 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5d5fa8504a..eadb431440 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -149,13 +149,11 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, instruction_->GetLocations()); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); - RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 0b4c569b05..89d80cc281 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -298,6 +298,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { stream << constant->AsIntConstant()->GetValue(); } else if (constant->IsLongConstant()) { stream << constant->AsLongConstant()->GetValue(); + } else if (constant->IsFloatConstant()) { + stream << constant->AsFloatConstant()->GetValue(); + } else if (constant->IsDoubleConstant()) { + stream << constant->AsDoubleConstant()->GetValue(); + } else if (constant->IsNullConstant()) { + stream << "null"; } } else if (location.IsInvalid()) { stream << "invalid"; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 06d1148652..e3a9d27a53 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -26,7 +26,6 @@ #include "mirror/string.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" -#include "utils/arm64/constants_arm64.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -62,14 +61,14 @@ ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_ } // namespace MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() { - return codegen_->GetAssembler()->vixl_masm_; + return codegen_->GetVIXLAssembler(); } ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() { return codegen_->GetGraph()->GetArena(); } -#define __ codegen->GetAssembler()->vixl_masm_-> +#define __ codegen->GetVIXLAssembler()-> static void MoveFromReturnRegister(Location trg, Primitive::Type type, @@ -782,7 +781,7 @@ static void GenUnsafeGet(HInvoke* invoke, DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); - MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; + MacroAssembler* masm = codegen->GetVIXLAssembler(); Location base_loc = locations->InAt(1); Register base = WRegisterFrom(base_loc); // Object pointer. Location offset_loc = locations->InAt(2); @@ -916,7 +915,7 @@ static void GenUnsafePut(LocationSummary* locations, bool is_volatile, bool is_ordered, CodeGeneratorARM64* codegen) { - MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; + MacroAssembler* masm = codegen->GetVIXLAssembler(); Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. @@ -1035,7 +1034,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, } static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { - MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; + MacroAssembler* masm = codegen->GetVIXLAssembler(); Register out = WRegisterFrom(locations->Out()); // Boolean result. diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index d82caf57e3..dc1f24a152 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -28,7 +28,7 @@ namespace arm64 { #ifdef ___ #error "ARM64 Assembler macro already defined." #else -#define ___ vixl_masm_-> +#define ___ vixl_masm_. #endif void Arm64Assembler::FinalizeCode() { @@ -39,16 +39,16 @@ void Arm64Assembler::FinalizeCode() { } size_t Arm64Assembler::CodeSize() const { - return vixl_masm_->GetBufferCapacity() - vixl_masm_->GetRemainingBufferSpace(); + return vixl_masm_.GetBufferCapacity() - vixl_masm_.GetRemainingBufferSpace(); } const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const { - return vixl_masm_->GetStartAddress<uint8_t*>(); + return vixl_masm_.GetStartAddress<uint8_t*>(); } void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. - MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize()); + MemoryRegion from(vixl_masm_.GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); } @@ -86,7 +86,7 @@ void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value, } else { // temp = rd + value // rd = cond ? temp : rn - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(rd), reg_x(rn)); Register temp = temps.AcquireX(); ___ Add(temp, reg_x(rn), value); @@ -183,7 +183,7 @@ void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset64 tr_offs, } void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset64 tr_offs) { - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); Register temp = temps.AcquireX(); ___ Mov(temp, reg_x(SP)); ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); @@ -207,7 +207,7 @@ void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value, // temp = value // rd = cond ? temp : rd if (value != 0) { - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(dest)); Register temp = temps.AcquireX(); ___ Mov(temp, value); @@ -314,7 +314,7 @@ void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, O Arm64ManagedRegister base = m_base.AsArm64(); CHECK(dst.IsXRegister() && base.IsXRegister()); // Remove dst and base form the temp list - higher level API uses IP1, IP0. - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister())); ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); } @@ -528,7 +528,7 @@ void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister CHECK(base.IsXRegister()) << base; CHECK(scratch.IsXRegister()) << scratch; // Remove base and scratch form the temp list - higher level API uses IP1, IP0. - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister())); ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); ___ Br(reg_x(scratch.AsXRegister())); @@ -621,7 +621,7 @@ void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjus } void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { - UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(exception->scratch_.AsXRegister())); Register temp = temps.AcquireX(); @@ -653,7 +653,7 @@ static inline dwarf::Reg DWARFReg(CPURegister reg) { void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) { int size = registers.GetRegisterSizeInBytes(); - const Register sp = vixl_masm_->StackPointer(); + const Register sp = vixl_masm_.StackPointer(); // Since we are operating on register pairs, we would like to align on // double the standard size; on the other hand, we don't want to insert // an extra store, which will happen if the number of registers is even. @@ -681,7 +681,7 @@ void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) { void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) { int size = registers.GetRegisterSizeInBytes(); - const Register sp = vixl_masm_->StackPointer(); + const Register sp = vixl_masm_.StackPointer(); // Be consistent with the logic for spilling registers. if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) { const CPURegister& dst0 = registers.PopLowestIndex(); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 24b798201a..b8434b9263 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -23,7 +23,6 @@ #include "base/arena_containers.h" #include "base/logging.h" -#include "constants_arm64.h" #include "utils/arm64/managed_register_arm64.h" #include "utils/assembler.h" #include "offsets.h" @@ -84,16 +83,13 @@ class Arm64Exception { class Arm64Assembler FINAL : public Assembler { public: - // We indicate the size of the initial code generation buffer to the VIXL - // assembler. From there we it will automatically manage the buffer. explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena), - exception_blocks_(arena->Adapter(kArenaAllocAssembler)), - vixl_masm_(new vixl::aarch64::MacroAssembler(kArm64BaseBufferSize)) {} + exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {} - virtual ~Arm64Assembler() { - delete vixl_masm_; - } + virtual ~Arm64Assembler() {} + + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } // Finalize the code. void FinalizeCode() OVERRIDE; @@ -287,9 +283,8 @@ class Arm64Assembler FINAL : public Assembler { // List of exception blocks to generate at the end of the code cache. ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; - public: - // Vixl assembler. - vixl::aarch64::MacroAssembler* const vixl_masm_; + // VIXL assembler. + vixl::aarch64::MacroAssembler vixl_masm_; // Used for testing. friend class Arm64ManagedRegister_VixlRegisters_Test; diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h deleted file mode 100644 index 01e8be9de6..0000000000 --- a/compiler/utils/arm64/constants_arm64.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ -#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ - -#include <stdint.h> -#include <iosfwd> -#include "arch/arm64/registers_arm64.h" -#include "base/casts.h" -#include "base/logging.h" -#include "globals.h" - -// TODO: Extend this file by adding missing functionality. - -namespace art { -namespace arm64 { - -constexpr size_t kArm64BaseBufferSize = 4096; - -} // namespace arm64 -} // namespace art - -#endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index f7d74d2af4..7378a0a081 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_ #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_ +#include "arch/arm64/registers_arm64.h" #include "base/logging.h" -#include "constants_arm64.h" #include "debug/dwarf/register.h" #include "utils/managed_register.h" diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 6c43e863e1..4f0e144aa8 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -941,17 +941,11 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) opcode << (op != 0 ? "vsqrt" : "vneg") << (S != 0 ? ".f64" : ".f32"); args << d << ", " << m; } else if (op5 == 4) { - opcode << "vcmp" << (S != 0 ? ".f64" : ".f32"); + opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32"); args << d << ", " << m; - if (op != 0) { - args << " (quiet nan)"; - } } else if (op5 == 5) { - opcode << "vcmpe" << (S != 0 ? ".f64" : ".f32"); + opcode << "vcmp" << ((op != 0) ? "e" : "") << (S != 0 ? ".f64" : ".f32"); args << d << ", #0.0"; - if (op != 0) { - args << " (quiet nan)"; - } if ((instr & 0x2f) != 0) { args << " (UNPREDICTABLE)"; } diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 8c3c5e580f..a0def615b0 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -76,6 +76,7 @@ const char* image_methods_descriptions_[] = { "kCalleeSaveMethod", "kRefsOnlySaveMethod", "kRefsAndArgsSaveMethod", + "kSaveEverythingMethod", }; const char* image_roots_descriptions_[] = { diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index ee31c58979..6d80eb6198 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -69,7 +69,9 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace arm namespace arm64 { #include "arch/arm64/asm_support_arm64.h" @@ -79,7 +81,9 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace arm64 namespace mips { #include "arch/mips/asm_support_mips.h" @@ -89,7 +93,9 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace mips namespace mips64 { #include "arch/mips64/asm_support_mips64.h" @@ -99,7 +105,9 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace mips64 namespace x86 { #include "arch/x86/asm_support_x86.h" @@ -109,7 +117,9 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace x86 namespace x86_64 { #include "arch/x86_64/asm_support_x86_64.h" @@ -119,13 +129,18 @@ static constexpr size_t kFrameSizeRefsOnlyCalleeSave = FRAME_SIZE_REFS_ONLY_CALL #undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE static constexpr size_t kFrameSizeRefsAndArgsCalleeSave = FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE; #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE -} +static constexpr size_t kFrameSizeSaveEverythingCalleeSave = FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE; +#undef FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE +} // namespace x86_64 // Check architecture specific constants are sound. TEST_F(ArchTest, ARM) { CheckFrameSize(InstructionSet::kArm, Runtime::kSaveAll, arm::kFrameSizeSaveAllCalleeSave); CheckFrameSize(InstructionSet::kArm, Runtime::kRefsOnly, arm::kFrameSizeRefsOnlyCalleeSave); CheckFrameSize(InstructionSet::kArm, Runtime::kRefsAndArgs, arm::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kArm, + Runtime::kSaveEverything, + arm::kFrameSizeSaveEverythingCalleeSave); } @@ -134,33 +149,51 @@ TEST_F(ArchTest, ARM64) { CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsOnly, arm64::kFrameSizeRefsOnlyCalleeSave); CheckFrameSize(InstructionSet::kArm64, Runtime::kRefsAndArgs, arm64::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kArm64, + Runtime::kSaveEverything, + arm64::kFrameSizeSaveEverythingCalleeSave); } TEST_F(ArchTest, MIPS) { CheckFrameSize(InstructionSet::kMips, Runtime::kSaveAll, mips::kFrameSizeSaveAllCalleeSave); CheckFrameSize(InstructionSet::kMips, Runtime::kRefsOnly, mips::kFrameSizeRefsOnlyCalleeSave); - CheckFrameSize(InstructionSet::kMips, Runtime::kRefsAndArgs, + CheckFrameSize(InstructionSet::kMips, + Runtime::kRefsAndArgs, mips::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kMips, + Runtime::kSaveEverything, + mips::kFrameSizeSaveEverythingCalleeSave); } TEST_F(ArchTest, MIPS64) { CheckFrameSize(InstructionSet::kMips64, Runtime::kSaveAll, mips64::kFrameSizeSaveAllCalleeSave); CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsOnly, mips64::kFrameSizeRefsOnlyCalleeSave); - CheckFrameSize(InstructionSet::kMips64, Runtime::kRefsAndArgs, + CheckFrameSize(InstructionSet::kMips64, + Runtime::kRefsAndArgs, mips64::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kMips64, + Runtime::kSaveEverything, + mips64::kFrameSizeSaveEverythingCalleeSave); } TEST_F(ArchTest, X86) { CheckFrameSize(InstructionSet::kX86, Runtime::kSaveAll, x86::kFrameSizeSaveAllCalleeSave); CheckFrameSize(InstructionSet::kX86, Runtime::kRefsOnly, x86::kFrameSizeRefsOnlyCalleeSave); CheckFrameSize(InstructionSet::kX86, Runtime::kRefsAndArgs, x86::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kX86, + Runtime::kSaveEverything, + x86::kFrameSizeSaveEverythingCalleeSave); } TEST_F(ArchTest, X86_64) { CheckFrameSize(InstructionSet::kX86_64, Runtime::kSaveAll, x86_64::kFrameSizeSaveAllCalleeSave); CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsOnly, x86_64::kFrameSizeRefsOnlyCalleeSave); - CheckFrameSize(InstructionSet::kX86_64, Runtime::kRefsAndArgs, + CheckFrameSize(InstructionSet::kX86_64, + Runtime::kRefsAndArgs, x86_64::kFrameSizeRefsAndArgsCalleeSave); + CheckFrameSize(InstructionSet::kX86_64, + Runtime::kSaveEverything, + x86_64::kFrameSizeSaveEverythingCalleeSave); } } // namespace art diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index 1fa566bb5c..67f6f7a4c4 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -22,6 +22,7 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112 +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 192 // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 34d3158c62..42418ad2ff 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -168,6 +168,65 @@ .cfi_adjust_cfa_offset -40 .endm + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) + */ +.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME rTemp + push {r0-r12, lr} @ 14 words of callee saves and args. + .cfi_adjust_cfa_offset 56 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r4, 16 + .cfi_rel_offset r5, 20 + .cfi_rel_offset r6, 24 + .cfi_rel_offset r7, 28 + .cfi_rel_offset r8, 32 + .cfi_rel_offset r9, 36 + .cfi_rel_offset r10, 40 + .cfi_rel_offset r11, 44 + .cfi_rel_offset ip, 48 + .cfi_rel_offset lr, 52 + vpush {s0-s31} @ 32 words of float args. + .cfi_adjust_cfa_offset 128 + sub sp, #8 @ 2 words of space, alignment padding and Method* + .cfi_adjust_cfa_offset 8 + RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. + @ Load kSaveEverything Method* to rTemp. + ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET] + str \rTemp, [sp, #0] @ Store kSaveEverything Method* to the bottom of the stack. + str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. + + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 56 + 128 + 8) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(ARM) size not as expected." +#endif +.endm + +.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + add sp, #8 @ rewind sp + .cfi_adjust_cfa_offset -8 + vpop {s0-s31} + .cfi_adjust_cfa_offset -128 + pop {r0-r12, lr} @ 14 words of callee saves + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r9 + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore r12 + .cfi_restore lr + .cfi_adjust_cfa_offset -56 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz r0, 1f @ result non-zero branch over bx lr @ return @@ -520,7 +579,7 @@ ENTRY art_quick_lock_object ldr r2, [r9, #THREAD_ID_OFFSET] ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] mov r3, r1 - and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits cbnz r3, .Lnot_unlocked @ already thin locked @ unlocked case - r1: original lock word that's zero except for the read barrier bits. orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits @@ -536,9 +595,9 @@ ENTRY art_quick_lock_object cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock @ else contention, go to slow path mov r3, r1 @ copy the lock word to check count overflow. - and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits. + and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits. add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow - lsr r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT @ if either of the upper two bits (28-29) are set, we overflowed. + lsr r3, r2, #LOCK_WORD_GC_STATE_SHIFT @ if the first gc state bit is set, we overflowed. cbnz r3, .Lslow_lock @ if we overflow the count go slow path add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits @@ -581,17 +640,17 @@ ENTRY art_quick_unlock_object cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path ldr r2, [r9, #THREAD_ID_OFFSET] mov r3, r1 @ copy lock word to check thread id equality - and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId() uxth r3, r3 @ zero top 16 bits cbnz r3, .Lslow_unlock @ do lock word and self thread id's match? mov r3, r1 @ copy lock word to detect transition to unlocked - and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED @ zero the read barrier bits + and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE bpl .Lrecursive_thin_unlock @ transition to unlocked mov r3, r1 - and r3, #LOCK_WORD_READ_BARRIER_STATE_MASK @ r3: zero except for the preserved read barrier bits + and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED @ r3: zero except for the preserved gc bits dmb ish @ full (LoadStore|StoreStore) memory barrier #ifndef USE_READ_BARRIER str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @@ -1212,17 +1271,18 @@ END art_quick_alloc_object_region_tlab .extern artTestSuspendFromCode ENTRY art_quick_test_suspend #ifdef ARM_R4_SUSPEND_FLAG - ldrh r0, [rSELF, #THREAD_FLAGS_OFFSET] - mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL - cbnz r0, 1f @ check Thread::Current()->suspend_count_ == 0 - bx lr @ return if suspend_count_ == 0 + ldrh rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET] + cbnz rSUSPEND, 1f @ check Thread::Current()->suspend_count_ == 0 + mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL + bx lr @ return if suspend_count_ == 0 1: + mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL #endif + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME r0 @ save everything for GC stack crawl mov r0, rSELF - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1 @ save callee saves for GC stack crawl - @ TODO: save FPRs to enable access in the debugger? - bl artTestSuspendFromCode @ (Thread*) - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN + bl artTestSuspendFromCode @ (Thread*) + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + bx lr END art_quick_test_suspend ENTRY art_quick_implicit_suspend @@ -1772,6 +1832,20 @@ END art_quick_l2f */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name + // Null check so that we can load the lock word. + cmp \reg, #0 + beq .Lret_rb_\name + // Check lock word for mark bit, if marked return. + push {r0} + ldr r0, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] + and r0, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + cbz r0, .Lslow_rb_\name + // Restore LR and return. + pop {r0} + bx lr + +.Lslow_rb_\name: + pop {r0} push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers .cfi_adjust_cfa_offset 32 .cfi_rel_offset r0, 0 @@ -1831,6 +1905,8 @@ ENTRY \name .endif .endif pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return +.Lret_rb_\name: + bx lr END \name .endm diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h index 0fb8a6317b..c474d2ee1c 100644 --- a/runtime/arch/arm/quick_method_frame_info_arm.h +++ b/runtime/arch/arm/quick_method_frame_info_arm.h @@ -34,6 +34,9 @@ static constexpr uint32_t kArmCalleeSaveArgSpills = (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3); static constexpr uint32_t kArmCalleeSaveAllSpills = (1 << art::arm::R4) | (1 << art::arm::R9); +static constexpr uint32_t kArmCalleeSaveEverythingSpills = + (1 << art::arm::R0) | (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3) | + (1 << art::arm::R4) | (1 << art::arm::R9) | (1 << art::arm::R12); static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0; static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0; @@ -47,17 +50,21 @@ static constexpr uint32_t kArmCalleeSaveFpAllSpills = (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) | (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) | (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31); +static constexpr uint32_t kArmCalleeSaveFpEverythingSpills = + kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills; constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0); + (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) | + (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0); } constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) { return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) | - (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0); + (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0) | + (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0); } constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h index 989ecc6c5b..68d12e9306 100644 --- a/runtime/arch/arm64/asm_support_arm64.h +++ b/runtime/arch/arm64/asm_support_arm64.h @@ -22,5 +22,6 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224 +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 512 #endif // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_ diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index a5be52d8a0..415bb71ba0 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -316,6 +316,204 @@ .cfi_adjust_cfa_offset -224 .endm + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) + */ +.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + sub sp, sp, #512 + .cfi_adjust_cfa_offset 512 + + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 512) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(ARM64) size not as expected." +#endif + + // Save FP registers. + stp d0, d1, [sp, #8] + stp d2, d3, [sp, #24] + stp d4, d5, [sp, #40] + stp d6, d7, [sp, #56] + stp d8, d9, [sp, #72] + stp d10, d11, [sp, #88] + stp d12, d13, [sp, #104] + stp d14, d15, [sp, #120] + stp d16, d17, [sp, #136] + stp d18, d19, [sp, #152] + stp d20, d21, [sp, #168] + stp d22, d23, [sp, #184] + stp d24, d25, [sp, #200] + stp d26, d27, [sp, #216] + stp d28, d29, [sp, #232] + stp d30, d31, [sp, #248] + + // Save core registers. + str x0, [sp, #264] + .cfi_rel_offset x0, 264 + + stp x1, x2, [sp, #272] + .cfi_rel_offset x1, 272 + .cfi_rel_offset x2, 280 + + stp x3, x4, [sp, #288] + .cfi_rel_offset x3, 288 + .cfi_rel_offset x4, 296 + + stp x5, x6, [sp, #304] + .cfi_rel_offset x5, 304 + .cfi_rel_offset x6, 312 + + stp x7, x8, [sp, #320] + .cfi_rel_offset x7, 320 + .cfi_rel_offset x8, 328 + + stp x9, x10, [sp, #336] + .cfi_rel_offset x9, 336 + .cfi_rel_offset x10, 344 + + stp x11, x12, [sp, #352] + .cfi_rel_offset x11, 352 + .cfi_rel_offset x12, 360 + + stp x13, x14, [sp, #368] + .cfi_rel_offset x13, 368 + .cfi_rel_offset x14, 376 + + stp x15, x16, [sp, #384] + .cfi_rel_offset x15, 384 + .cfi_rel_offset x16, 392 + + stp x17, x18, [sp, #400] + .cfi_rel_offset x17, 400 + .cfi_rel_offset x18, 408 + + stp x19, x20, [sp, #416] + .cfi_rel_offset x19, 416 + .cfi_rel_offset x20, 424 + + stp x21, x22, [sp, #432] + .cfi_rel_offset x21, 432 + .cfi_rel_offset x22, 440 + + stp x23, x24, [sp, #448] + .cfi_rel_offset x23, 448 + .cfi_rel_offset x24, 456 + + stp x25, x26, [sp, #464] + .cfi_rel_offset x25, 464 + .cfi_rel_offset x26, 472 + + stp x27, x28, [sp, #480] + .cfi_rel_offset x27, 480 + .cfi_rel_offset x28, 488 + + stp x29, xLR, [sp, #496] + .cfi_rel_offset x29, 496 + .cfi_rel_offset x30, 504 + + adrp xIP0, :got:_ZN3art7Runtime9instance_E + ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E] + + ldr xIP0, [xIP0] // xIP0 = & (art::Runtime * art::Runtime.instance_) . + + // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kSaveEverything] . + // Loads appropriate callee-save-method. + ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET ] + + // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything]. + str xIP0, [sp] + // Place sp in Thread::Current()->top_quick_frame. + mov xIP0, sp + str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET] +.endm + +.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + // Restore FP registers. + ldp d0, d1, [sp, #8] + ldp d2, d3, [sp, #24] + ldp d4, d5, [sp, #40] + ldp d6, d7, [sp, #56] + ldp d8, d9, [sp, #72] + ldp d10, d11, [sp, #88] + ldp d12, d13, [sp, #104] + ldp d14, d15, [sp, #120] + ldp d16, d17, [sp, #136] + ldp d18, d19, [sp, #152] + ldp d20, d21, [sp, #168] + ldp d22, d23, [sp, #184] + ldp d24, d25, [sp, #200] + ldp d26, d27, [sp, #216] + ldp d28, d29, [sp, #232] + ldp d30, d31, [sp, #248] + + // Restore core registers. + ldr x0, [sp, #264] + .cfi_restore x0 + + ldp x1, x2, [sp, #272] + .cfi_restore x1 + .cfi_restore x2 + + ldp x3, x4, [sp, #288] + .cfi_restore x3 + .cfi_restore x4 + + ldp x5, x6, [sp, #304] + .cfi_restore x5 + .cfi_restore x6 + + ldp x7, x8, [sp, #320] + .cfi_restore x7 + .cfi_restore x8 + + ldp x9, x10, [sp, #336] + .cfi_restore x9 + .cfi_restore x10 + + ldp x11, x12, [sp, #352] + .cfi_restore x11 + .cfi_restore x12 + + ldp x13, x14, [sp, #368] + .cfi_restore x13 + .cfi_restore x14 + + ldp x15, x16, [sp, #384] + .cfi_restore x15 + .cfi_restore x16 + + ldp x17, x18, [sp, #400] + .cfi_restore x17 + .cfi_restore x18 + + ldp x19, x20, [sp, #416] + .cfi_restore x19 + .cfi_restore x20 + + ldp x21, x22, [sp, #432] + .cfi_restore x21 + .cfi_restore x22 + + ldp x23, x24, [sp, #448] + .cfi_restore x23 + .cfi_restore x24 + + ldp x25, x26, [sp, #464] + .cfi_restore x25 + .cfi_restore x26 + + ldp x27, x28, [sp, #480] + .cfi_restore x27 + .cfi_restore x28 + + ldp x29, xLR, [sp, #496] + .cfi_restore x29 + .cfi_restore x30 + + add sp, sp, #512 + .cfi_adjust_cfa_offset -512 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz x0, 1f // result non-zero branch over ret // return @@ -1090,7 +1288,7 @@ ENTRY art_quick_lock_object ldr w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop? ldxr w1, [x4] mov x3, x1 - and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits cbnz w3, .Lnot_unlocked // already thin locked // unlocked case - x1: original lock word that's zero except for the read barrier bits. orr x2, x1, x2 // x2 holds thread id with count of 0 with preserved read barrier bits @@ -1106,9 +1304,9 @@ ENTRY art_quick_lock_object cbnz w2, .Lslow_lock // lock word and self thread id's match -> recursive lock // else contention, go to slow path mov x3, x1 // copy the lock word to check count overflow. - and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits. + and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits. add w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count in lock word placing in w2 to check overflow - lsr w3, w2, LOCK_WORD_READ_BARRIER_STATE_SHIFT // if either of the upper two bits (28-29) are set, we overflowed. + lsr w3, w2, #LOCK_WORD_GC_STATE_SHIFT // if the first gc state bit is set, we overflowed. cbnz w3, .Lslow_lock // if we overflow the count go slow path add w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE // increment count for real stxr w3, w2, [x4] @@ -1152,17 +1350,17 @@ ENTRY art_quick_unlock_object cbnz w2, .Lslow_unlock // if either of the top two bits are set, go slow path ldr w2, [xSELF, #THREAD_ID_OFFSET] mov x3, x1 // copy lock word to check thread id equality - and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits eor w3, w3, w2 // lock_word.ThreadId() ^ self->ThreadId() uxth w3, w3 // zero top 16 bits cbnz w3, .Lslow_unlock // do lock word and self thread id's match? mov x3, x1 // copy lock word to detect transition to unlocked - and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED // zero the read barrier bits + and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // zero the gc bits cmp w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE bpl .Lrecursive_thin_unlock // transition to unlocked mov x3, x1 - and w3, w3, #LOCK_WORD_READ_BARRIER_STATE_MASK // w3: zero except for the preserved read barrier bits + and w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED // w3: zero except for the preserved read barrier bits dmb ish // full (LoadStore|StoreStore) memory barrier #ifndef USE_READ_BARRIER str w3, [x4] @@ -1791,12 +1989,20 @@ ENTRY art_quick_alloc_object_region_tlab ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array // Load the class (x2) ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - // Read barrier for class load. + + // Most common case: GC is not marking. ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: + cbnz x3, .Lart_quick_alloc_object_region_tlab_marking +.Lart_quick_alloc_object_region_tlab_do_allocation: ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: +.Lart_quick_alloc_object_region_tlab_marking: + // GC is marking, check the lock word of the class for the mark bit. + // If the class is null, go slow path. The check is required to read the lock word. + cbz w2, .Lart_quick_alloc_object_region_tlab_slow_path + // Class is not null, check mark bit in lock word. + ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + // If the bit is not zero, do the allocation. + tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_alloc_object_region_tlab_do_allocation // The read barrier slow path. Mark // the class. stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr). @@ -1807,7 +2013,7 @@ ENTRY art_quick_alloc_object_region_tlab ldp x0, x1, [sp, #0] // Restore registers. ldr xLR, [sp, #16] add sp, sp, #32 - b .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + b .Lart_quick_alloc_object_region_tlab_do_allocation .Lart_quick_alloc_object_region_tlab_slow_path: SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case of GC. mov x2, xSELF // Pass Thread::Current. @@ -1821,14 +2027,11 @@ END art_quick_alloc_object_region_tlab */ .extern artTestSuspendFromCode ENTRY art_quick_test_suspend - ldrh w0, [xSELF, #THREAD_FLAGS_OFFSET] // get xSELF->state_and_flags.as_struct.flags - cbnz w0, .Lneed_suspend // check flags == 0 - ret // return if flags == 0 -.Lneed_suspend: + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // save callee saves for stack crawl mov x0, xSELF - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves for stack crawl bl artTestSuspendFromCode // (Thread*) - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + ret END art_quick_test_suspend ENTRY art_quick_implicit_suspend @@ -2265,6 +2468,8 @@ END art_quick_indexof */ .macro READ_BARRIER_MARK_REG name, wreg, xreg ENTRY \name + // Reference is null, no work to do at all. + cbz \wreg, .Lret_rb_\name /* * Allocate 46 stack slots * 8 = 368 bytes: * - 20 slots for core registers X0-X19 @@ -2272,6 +2477,11 @@ ENTRY \name * - 1 slot for return address register XLR * - 1 padding slot for 16-byte stack alignment */ + // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler. + ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name + ret +.Lslow_path_rb_\name: // Save all potentially live caller-save core registers. stp x0, x1, [sp, #-368]! .cfi_adjust_cfa_offset 368 @@ -2360,6 +2570,7 @@ ENTRY \name .cfi_restore x30 add sp, sp, #368 .cfi_adjust_cfa_offset -368 +.Lret_rb_\name: ret END \name .endm diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h index b3d250b1fb..188e46e72d 100644 --- a/runtime/arch/arm64/quick_method_frame_info_arm64.h +++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h @@ -29,7 +29,7 @@ namespace arm64 { static constexpr uint32_t kArm64CalleeSaveAlwaysSpills = // Note: ArtMethod::GetReturnPcOffsetInBytes() rely on the assumption that // LR is always saved on the top of the frame for all targets. - // That is, lr = *(sp + framesize - pointsize). + // That is, lr = *(sp + framesize - pointer_size). (1 << art::arm64::LR); // Callee saved registers static constexpr uint32_t kArm64CalleeSaveRefSpills = @@ -44,6 +44,14 @@ static constexpr uint32_t kArm64CalleeSaveArgSpills = (1 << art::arm64::X7); static constexpr uint32_t kArm64CalleeSaveAllSpills = (1 << art::arm64::X19); +static constexpr uint32_t kArm64CalleeSaveEverythingSpills = + (1 << art::arm64::X0) | (1 << art::arm64::X1) | (1 << art::arm64::X2) | + (1 << art::arm64::X3) | (1 << art::arm64::X4) | (1 << art::arm64::X5) | + (1 << art::arm64::X6) | (1 << art::arm64::X7) | (1 << art::arm64::X8) | + (1 << art::arm64::X9) | (1 << art::arm64::X10) | (1 << art::arm64::X11) | + (1 << art::arm64::X12) | (1 << art::arm64::X13) | (1 << art::arm64::X14) | + (1 << art::arm64::X15) | (1 << art::arm64::X16) | (1 << art::arm64::X17) | + (1 << art::arm64::X18) | (1 << art::arm64::X19); static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0; static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0; @@ -55,17 +63,31 @@ static constexpr uint32_t kArm64CalleeSaveFpAllSpills = (1 << art::arm64::D8) | (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) | (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) | (1 << art::arm64::D15); +static constexpr uint32_t kArm64CalleeSaveFpEverythingSpills = + (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) | + (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) | + (1 << art::arm64::D6) | (1 << art::arm64::D7) | (1 << art::arm64::D8) | + (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) | + (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) | + (1 << art::arm64::D15) | (1 << art::arm64::D16) | (1 << art::arm64::D17) | + (1 << art::arm64::D18) | (1 << art::arm64::D19) | (1 << art::arm64::D20) | + (1 << art::arm64::D21) | (1 << art::arm64::D22) | (1 << art::arm64::D23) | + (1 << art::arm64::D24) | (1 << art::arm64::D25) | (1 << art::arm64::D26) | + (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) | + (1 << art::arm64::D30) | (1 << art::arm64::D31); constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0); + (type == Runtime::kSaveAll ? kArm64CalleeSaveAllSpills : 0) | + (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0); } constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) | - (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0); + (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0) | + (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0); } constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h index 453056d7bf..2ef45f5679 100644 --- a/runtime/arch/mips/asm_support_mips.h +++ b/runtime/arch/mips/asm_support_mips.h @@ -22,5 +22,6 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 96 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 48 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 80 +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 256 #endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_ diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index c1b8044be9..b926bdfb9f 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -277,6 +277,197 @@ .endm /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything). + * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31; + * 28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method* + * Clobbers $t0 and $t1. + * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots. + * Reserves FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE + ARG_SLOT_SIZE bytes on the stack. + * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP. + */ +.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + addiu $sp, $sp, -256 + .cfi_adjust_cfa_offset 256 + + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 256) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(MIPS) size not as expected." +#endif + + sw $ra, 252($sp) + .cfi_rel_offset 31, 252 + sw $fp, 248($sp) + .cfi_rel_offset 30, 248 + sw $gp, 244($sp) + .cfi_rel_offset 28, 244 + sw $t9, 240($sp) + .cfi_rel_offset 25, 240 + sw $t8, 236($sp) + .cfi_rel_offset 24, 236 + sw $s7, 232($sp) + .cfi_rel_offset 23, 232 + sw $s6, 228($sp) + .cfi_rel_offset 22, 228 + sw $s5, 224($sp) + .cfi_rel_offset 21, 224 + sw $s4, 220($sp) + .cfi_rel_offset 20, 220 + sw $s3, 216($sp) + .cfi_rel_offset 19, 216 + sw $s2, 212($sp) + .cfi_rel_offset 18, 212 + sw $s1, 208($sp) + .cfi_rel_offset 17, 208 + sw $s0, 204($sp) + .cfi_rel_offset 16, 204 + sw $t7, 200($sp) + .cfi_rel_offset 15, 200 + sw $t6, 196($sp) + .cfi_rel_offset 14, 196 + sw $t5, 192($sp) + .cfi_rel_offset 13, 192 + sw $t4, 188($sp) + .cfi_rel_offset 12, 188 + sw $t3, 184($sp) + .cfi_rel_offset 11, 184 + sw $t2, 180($sp) + .cfi_rel_offset 10, 180 + sw $t1, 176($sp) + .cfi_rel_offset 9, 176 + sw $t0, 172($sp) + .cfi_rel_offset 8, 172 + sw $a3, 168($sp) + .cfi_rel_offset 7, 168 + sw $a2, 164($sp) + .cfi_rel_offset 6, 164 + sw $a1, 160($sp) + .cfi_rel_offset 5, 160 + sw $a0, 156($sp) + .cfi_rel_offset 4, 156 + sw $v1, 152($sp) + .cfi_rel_offset 3, 152 + sw $v0, 148($sp) + .cfi_rel_offset 2, 148 + + // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction. + bal 1f + sw $at, 144($sp) + .cfi_rel_offset 1, 144 +1: + .cpload $ra + + SDu $f30, $f31, 136, $sp, $t1 + SDu $f28, $f29, 128, $sp, $t1 + SDu $f26, $f27, 120, $sp, $t1 + SDu $f24, $f25, 112, $sp, $t1 + SDu $f22, $f23, 104, $sp, $t1 + SDu $f20, $f21, 96, $sp, $t1 + SDu $f18, $f19, 88, $sp, $t1 + SDu $f16, $f17, 80, $sp, $t1 + SDu $f14, $f15, 72, $sp, $t1 + SDu $f12, $f13, 64, $sp, $t1 + SDu $f10, $f11, 56, $sp, $t1 + SDu $f8, $f9, 48, $sp, $t1 + SDu $f6, $f7, 40, $sp, $t1 + SDu $f4, $f5, 32, $sp, $t1 + SDu $f2, $f3, 24, $sp, $t1 + SDu $f0, $f1, 16, $sp, $t1 + + # 3 words padding and 1 word for holding Method* + + lw $t0, %got(_ZN3art7Runtime9instance_E)($gp) + lw $t0, 0($t0) + lw $t0, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET($t0) + sw $t0, 0($sp) # Place Method* at bottom of stack. + sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. + addiu $sp, $sp, -ARG_SLOT_SIZE # reserve argument slots on the stack + .cfi_adjust_cfa_offset ARG_SLOT_SIZE +.endm + +.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack + .cfi_adjust_cfa_offset -ARG_SLOT_SIZE + + LDu $f30, $f31, 136, $sp, $t1 + LDu $f28, $f29, 128, $sp, $t1 + LDu $f26, $f27, 120, $sp, $t1 + LDu $f24, $f25, 112, $sp, $t1 + LDu $f22, $f23, 104, $sp, $t1 + LDu $f20, $f21, 96, $sp, $t1 + LDu $f18, $f19, 88, $sp, $t1 + LDu $f16, $f17, 80, $sp, $t1 + LDu $f14, $f15, 72, $sp, $t1 + LDu $f12, $f13, 64, $sp, $t1 + LDu $f10, $f11, 56, $sp, $t1 + LDu $f8, $f9, 48, $sp, $t1 + LDu $f6, $f7, 40, $sp, $t1 + LDu $f4, $f5, 32, $sp, $t1 + LDu $f2, $f3, 24, $sp, $t1 + LDu $f0, $f1, 16, $sp, $t1 + + lw $ra, 252($sp) + .cfi_restore 31 + lw $fp, 248($sp) + .cfi_restore 30 + lw $gp, 244($sp) + .cfi_restore 28 + lw $t9, 240($sp) + .cfi_restore 25 + lw $t8, 236($sp) + .cfi_restore 24 + lw $s7, 232($sp) + .cfi_restore 23 + lw $s6, 228($sp) + .cfi_restore 22 + lw $s5, 224($sp) + .cfi_restore 21 + lw $s4, 220($sp) + .cfi_restore 20 + lw $s3, 216($sp) + .cfi_restore 19 + lw $s2, 212($sp) + .cfi_restore 18 + lw $s1, 208($sp) + .cfi_restore 17 + lw $s0, 204($sp) + .cfi_restore 16 + lw $t7, 200($sp) + .cfi_restore 15 + lw $t6, 196($sp) + .cfi_restore 14 + lw $t5, 192($sp) + .cfi_restore 13 + lw $t4, 188($sp) + .cfi_restore 12 + lw $t3, 184($sp) + .cfi_restore 11 + lw $t2, 180($sp) + .cfi_restore 10 + lw $t1, 176($sp) + .cfi_restore 9 + lw $t0, 172($sp) + .cfi_restore 8 + lw $a3, 168($sp) + .cfi_restore 7 + lw $a2, 164($sp) + .cfi_restore 6 + lw $a1, 160($sp) + .cfi_restore 5 + lw $a0, 156($sp) + .cfi_restore 4 + lw $v1, 152($sp) + .cfi_restore 3 + lw $v0, 148($sp) + .cfi_restore 2 + lw $at, 144($sp) + .cfi_restore 1 + + addiu $sp, $sp, 256 # pop frame + .cfi_adjust_cfa_offset -256 +.endm + + /* * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending * exception is Thread::Current()->exception_ */ @@ -1652,18 +1843,20 @@ ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeA * Called by managed code when the value in rSUSPEND has been decremented to 0. */ .extern artTestSuspendFromCode -ENTRY art_quick_test_suspend - lh $a0, THREAD_FLAGS_OFFSET(rSELF) - bnez $a0, 1f +ENTRY_NO_GP art_quick_test_suspend + lh rSUSPEND, THREAD_FLAGS_OFFSET(rSELF) + bnez rSUSPEND, 1f addiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL # reset rSUSPEND to SUSPEND_CHECK_INTERVAL jalr $zero, $ra nop 1: - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves for stack crawl + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME # save everything for stack crawl la $t9, artTestSuspendFromCode - jalr $t9 # (Thread*) + jalr $t9 # (Thread*) move $a0, rSELF - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + jalr $zero, $ra + nop END art_quick_test_suspend /* diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h index 7b0623b2cc..170513d174 100644 --- a/runtime/arch/mips/quick_method_frame_info_mips.h +++ b/runtime/arch/mips/quick_method_frame_info_mips.h @@ -34,6 +34,12 @@ static constexpr uint32_t kMipsCalleeSaveArgSpills = (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3); static constexpr uint32_t kMipsCalleeSaveAllSpills = (1 << art::mips::S0) | (1 << art::mips::S1); +static constexpr uint32_t kMipsCalleeSaveEverythingSpills = + (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) | + (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | + (1 << art::mips::T0) | (1 << art::mips::T1) | (1 << art::mips::T2) | (1 << art::mips::T3) | + (1 << art::mips::T4) | (1 << art::mips::T5) | (1 << art::mips::T6) | (1 << art::mips::T7) | + (1 << art::mips::S0) | (1 << art::mips::S1) | (1 << art::mips::T8) | (1 << art::mips::T9); static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0; static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0; @@ -43,17 +49,28 @@ static constexpr uint32_t kMipsCalleeSaveAllFPSpills = (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) | (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) | (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31); +static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills = + (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) | + (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) | + (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) | + (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) | + (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) | + (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) | + (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) | + (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31); constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0); + (type == Runtime::kSaveAll ? kMipsCalleeSaveAllSpills : 0) | + (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0); } constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) { return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) | - (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0); + (type == Runtime::kSaveAll ? kMipsCalleeSaveAllFPSpills : 0) | + (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0); } constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc index 06d6211c88..0a9ab7aacd 100644 --- a/runtime/arch/mips/thread_mips.cc +++ b/runtime/arch/mips/thread_mips.cc @@ -25,7 +25,7 @@ namespace art { void Thread::InitCpu() { CHECK_EQ(THREAD_FLAGS_OFFSET, ThreadFlagsOffset<PointerSize::k32>().Int32Value()); CHECK_EQ(THREAD_CARD_TABLE_OFFSET, CardTableOffset<PointerSize::k32>().Int32Value()); - CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k64>().Int32Value()); + CHECK_EQ(THREAD_EXCEPTION_OFFSET, ExceptionOffset<PointerSize::k32>().Int32Value()); } void Thread::CleanupCpu() { diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h index 995fcf37bf..2c16c2532d 100644 --- a/runtime/arch/mips64/asm_support_mips64.h +++ b/runtime/arch/mips64/asm_support_mips64.h @@ -25,5 +25,7 @@ #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 80 // $f12-$f19, $a1-$a7, $s2-$s7 + $gp + $s8 + $ra, 16 total + 1x8 bytes padding + method* #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 208 +// $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method* +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE 496 #endif // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_ diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index ae6962076b..0a379098f5 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -314,6 +314,227 @@ .endm /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything). + * callee-save: $at + $v0-$v1 + $a0-$a7 + $t0-$t3 + $s0-$s7 + $t8-$t9 + $gp + $s8 + $ra + $s8, + * $f0-$f31; 28(GPR)+ 32(FPR) + 1x8 bytes padding + method* + * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP. + */ +.macro SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + daddiu $sp, $sp, -496 + .cfi_adjust_cfa_offset 496 + + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 496) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(MIPS64) size not as expected." +#endif + + // Save core registers. + sd $ra, 488($sp) + .cfi_rel_offset 31, 488 + sd $s8, 480($sp) + .cfi_rel_offset 30, 480 + sd $gp, 472($sp) + .cfi_rel_offset 28, 472 + sd $t9, 464($sp) + .cfi_rel_offset 25, 464 + sd $t8, 456($sp) + .cfi_rel_offset 24, 456 + sd $s7, 448($sp) + .cfi_rel_offset 23, 448 + sd $s6, 440($sp) + .cfi_rel_offset 22, 440 + sd $s5, 432($sp) + .cfi_rel_offset 21, 432 + sd $s4, 424($sp) + .cfi_rel_offset 20, 424 + sd $s3, 416($sp) + .cfi_rel_offset 19, 416 + sd $s2, 408($sp) + .cfi_rel_offset 18, 408 + sd $s1, 400($sp) + .cfi_rel_offset 17, 400 + sd $s0, 392($sp) + .cfi_rel_offset 16, 392 + sd $t3, 384($sp) + .cfi_rel_offset 15, 384 + sd $t2, 376($sp) + .cfi_rel_offset 14, 376 + sd $t1, 368($sp) + .cfi_rel_offset 13, 368 + sd $t0, 360($sp) + .cfi_rel_offset 12, 360 + sd $a7, 352($sp) + .cfi_rel_offset 11, 352 + sd $a6, 344($sp) + .cfi_rel_offset 10, 344 + sd $a5, 336($sp) + .cfi_rel_offset 9, 336 + sd $a4, 328($sp) + .cfi_rel_offset 8, 328 + sd $a3, 320($sp) + .cfi_rel_offset 7, 320 + sd $a2, 312($sp) + .cfi_rel_offset 6, 312 + sd $a1, 304($sp) + .cfi_rel_offset 5, 304 + sd $a0, 296($sp) + .cfi_rel_offset 4, 296 + sd $v1, 288($sp) + .cfi_rel_offset 3, 288 + sd $v0, 280($sp) + .cfi_rel_offset 2, 280 + + // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction. + bal 1f + sd $at, 272($sp) + .cfi_rel_offset 1, 272 +1: + // TODO: Can we avoid the unnecessary move $t8<-$gp? + .cpsetup $ra, $t8, 1b + + // Save FP registers. + s.d $f31, 264($sp) + s.d $f30, 256($sp) + s.d $f29, 248($sp) + s.d $f28, 240($sp) + s.d $f27, 232($sp) + s.d $f26, 224($sp) + s.d $f25, 216($sp) + s.d $f24, 208($sp) + s.d $f23, 200($sp) + s.d $f22, 192($sp) + s.d $f21, 184($sp) + s.d $f20, 176($sp) + s.d $f19, 168($sp) + s.d $f18, 160($sp) + s.d $f17, 152($sp) + s.d $f16, 144($sp) + s.d $f15, 136($sp) + s.d $f14, 128($sp) + s.d $f13, 120($sp) + s.d $f12, 112($sp) + s.d $f11, 104($sp) + s.d $f10, 96($sp) + s.d $f9, 88($sp) + s.d $f8, 80($sp) + s.d $f7, 72($sp) + s.d $f6, 64($sp) + s.d $f5, 56($sp) + s.d $f4, 48($sp) + s.d $f3, 40($sp) + s.d $f2, 32($sp) + s.d $f1, 24($sp) + s.d $f0, 16($sp) + + # load appropriate callee-save-method + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) + ld $t1, RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place ArtMethod* at bottom of stack. + # Place sp in Thread::Current()->top_quick_frame. + sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) +.endm + +.macro RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + // Restore FP registers. + l.d $f31, 264($sp) + l.d $f30, 256($sp) + l.d $f29, 248($sp) + l.d $f28, 240($sp) + l.d $f27, 232($sp) + l.d $f26, 224($sp) + l.d $f25, 216($sp) + l.d $f24, 208($sp) + l.d $f23, 200($sp) + l.d $f22, 192($sp) + l.d $f21, 184($sp) + l.d $f20, 176($sp) + l.d $f19, 168($sp) + l.d $f18, 160($sp) + l.d $f17, 152($sp) + l.d $f16, 144($sp) + l.d $f15, 136($sp) + l.d $f14, 128($sp) + l.d $f13, 120($sp) + l.d $f12, 112($sp) + l.d $f11, 104($sp) + l.d $f10, 96($sp) + l.d $f9, 88($sp) + l.d $f8, 80($sp) + l.d $f7, 72($sp) + l.d $f6, 64($sp) + l.d $f5, 56($sp) + l.d $f4, 48($sp) + l.d $f3, 40($sp) + l.d $f2, 32($sp) + l.d $f1, 24($sp) + l.d $f0, 16($sp) + + // Restore core registers. + ld $ra, 488($sp) + .cfi_restore 31 + ld $s8, 480($sp) + .cfi_restore 30 + ld $gp, 472($sp) + .cfi_restore 28 + ld $t9, 464($sp) + .cfi_restore 25 + ld $t8, 456($sp) + .cfi_restore 24 + ld $s7, 448($sp) + .cfi_restore 23 + ld $s6, 440($sp) + .cfi_restore 22 + ld $s5, 432($sp) + .cfi_restore 21 + ld $s4, 424($sp) + .cfi_restore 20 + ld $s3, 416($sp) + .cfi_restore 19 + ld $s2, 408($sp) + .cfi_restore 18 + ld $s1, 400($sp) + .cfi_restore 17 + ld $s0, 392($sp) + .cfi_restore 16 + ld $t3, 384($sp) + .cfi_restore 15 + ld $t2, 376($sp) + .cfi_restore 14 + ld $t1, 368($sp) + .cfi_restore 13 + ld $t0, 360($sp) + .cfi_restore 12 + ld $a7, 352($sp) + .cfi_restore 11 + ld $a6, 344($sp) + .cfi_restore 10 + ld $a5, 336($sp) + .cfi_restore 9 + ld $a4, 328($sp) + .cfi_restore 8 + ld $a3, 320($sp) + .cfi_restore 7 + ld $a2, 312($sp) + .cfi_restore 6 + ld $a1, 304($sp) + .cfi_restore 5 + ld $a0, 296($sp) + .cfi_restore 4 + ld $v1, 288($sp) + .cfi_restore 3 + ld $v0, 280($sp) + .cfi_restore 2 + ld $at, 272($sp) + .cfi_restore 1 + + .cpreturn + daddiu $sp, $sp, 496 + .cfi_adjust_cfa_offset -496 +.endm + + /* * Macro that set calls through to artDeliverPendingExceptionFromCode, * where the pending * exception is Thread::Current()->exception_ @@ -1673,17 +1894,19 @@ ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeA * Called by managed code when the value in rSUSPEND has been decremented to 0. */ .extern artTestSuspendFromCode -ENTRY art_quick_test_suspend - lh $a0, THREAD_FLAGS_OFFSET(rSELF) - bne $a0, $zero, 1f +ENTRY_NO_GP art_quick_test_suspend + lh rSUSPEND, THREAD_FLAGS_OFFSET(rSELF) + bne rSUSPEND, $zero, 1f daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL # reset rSUSPEND to SUSPEND_CHECK_INTERVAL jalr $zero, $ra - .cpreturn # Restore gp from t8 in branch delay slot. + nop 1: - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves for stack crawl + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME # save everything for stack crawl jal artTestSuspendFromCode # (Thread*) move $a0, rSELF - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME + jalr $zero, $ra + nop END art_quick_test_suspend /* diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h index b7dc57f672..d52945f5a4 100644 --- a/runtime/arch/mips64/quick_method_frame_info_mips64.h +++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h @@ -25,6 +25,8 @@ namespace art { namespace mips64 { +static constexpr uint32_t kMips64CalleeSaveAlwaysSpills = + (1 << art::mips64::RA); static constexpr uint32_t kMips64CalleeSaveRefSpills = (1 << art::mips64::S2) | (1 << art::mips64::S3) | (1 << art::mips64::S4) | (1 << art::mips64::S5) | (1 << art::mips64::S6) | (1 << art::mips64::S7) | @@ -35,6 +37,14 @@ static constexpr uint32_t kMips64CalleeSaveArgSpills = (1 << art::mips64::A7); static constexpr uint32_t kMips64CalleeSaveAllSpills = (1 << art::mips64::S0) | (1 << art::mips64::S1); +static constexpr uint32_t kMips64CalleeSaveEverythingSpills = + (1 << art::mips64::AT) | (1 << art::mips64::V0) | (1 << art::mips64::V1) | + (1 << art::mips64::A0) | (1 << art::mips64::A1) | (1 << art::mips64::A2) | + (1 << art::mips64::A3) | (1 << art::mips64::A4) | (1 << art::mips64::A5) | + (1 << art::mips64::A6) | (1 << art::mips64::A7) | (1 << art::mips64::T0) | + (1 << art::mips64::T1) | (1 << art::mips64::T2) | (1 << art::mips64::T3) | + (1 << art::mips64::S0) | (1 << art::mips64::S1) | (1 << art::mips64::T8) | + (1 << art::mips64::T9); static constexpr uint32_t kMips64CalleeSaveFpRefSpills = 0; static constexpr uint32_t kMips64CalleeSaveFpArgSpills = @@ -46,17 +56,31 @@ static constexpr uint32_t kMips64CalleeSaveFpAllSpills = (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) | (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) | (1 << art::mips64::F30) | (1 << art::mips64::F31); +static constexpr uint32_t kMips64CalleeSaveFpEverythingSpills = + (1 << art::mips64::F0) | (1 << art::mips64::F1) | (1 << art::mips64::F2) | + (1 << art::mips64::F3) | (1 << art::mips64::F4) | (1 << art::mips64::F5) | + (1 << art::mips64::F6) | (1 << art::mips64::F7) | (1 << art::mips64::F8) | + (1 << art::mips64::F9) | (1 << art::mips64::F10) | (1 << art::mips64::F11) | + (1 << art::mips64::F12) | (1 << art::mips64::F13) | (1 << art::mips64::F14) | + (1 << art::mips64::F15) | (1 << art::mips64::F16) | (1 << art::mips64::F17) | + (1 << art::mips64::F18) | (1 << art::mips64::F19) | (1 << art::mips64::F20) | + (1 << art::mips64::F21) | (1 << art::mips64::F22) | (1 << art::mips64::F23) | + (1 << art::mips64::F24) | (1 << art::mips64::F25) | (1 << art::mips64::F26) | + (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) | + (1 << art::mips64::F30) | (1 << art::mips64::F31); constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { - return kMips64CalleeSaveRefSpills | + return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | (1 << art::mips64::RA); + (type == Runtime::kSaveAll ? kMips64CalleeSaveAllSpills : 0) | + (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0); } constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { return kMips64CalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kMips64CalleeSaveFpArgSpills: 0) | - (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0); + (type == Runtime::kSaveAll ? kMips64CalleeSaveFpAllSpills : 0) | + (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0); } constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h index b0a6017b47..ba5fd997e8 100644 --- a/runtime/arch/x86/asm_support_x86.h +++ b/runtime/arch/x86/asm_support_x86.h @@ -21,8 +21,7 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 32 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32 - -// 32 bytes for GPRs and 32 bytes for FPRs. #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (32 + 32) +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE (48 + 64) #endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_ diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 77e04e7981..68ba0cf986 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -222,6 +222,74 @@ MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME_AND_JUMP) END_MACRO /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) + */ +MACRO2(SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME, got_reg, temp_reg) + // Save core registers. + PUSH edi + PUSH esi + PUSH ebp + PUSH ebx + PUSH edx + PUSH ecx + PUSH eax + // Create space for FPR registers and stack alignment padding. + subl MACRO_LITERAL(12 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(12 + 8 * 8) + // Save FPRs. + movsd %xmm0, 12(%esp) + movsd %xmm1, 20(%esp) + movsd %xmm2, 28(%esp) + movsd %xmm3, 36(%esp) + movsd %xmm4, 44(%esp) + movsd %xmm5, 52(%esp) + movsd %xmm6, 60(%esp) + movsd %xmm7, 68(%esp) + + SETUP_GOT_NOSAVE RAW_VAR(got_reg) + // Load Runtime::instance_ from GOT. + movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg) + movl (REG_VAR(temp_reg)), REG_VAR(temp_reg) + // Push save everything callee-save method. + pushl RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg)) + CFI_ADJUST_CFA_OFFSET(4) + // Store esp as the stop quick frame. + movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET + + // Ugly compile-time check, but we only have the preprocessor. + // Last +4: implicit return address pushed on stack when caller made call. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 7*4 + 8*8 + 12 + 4 + 4) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(X86) size not as expected." +#endif +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME) + // Restore FPRs. Method and padding is still on the stack. + movsd 16(%esp), %xmm0 + movsd 24(%esp), %xmm1 + movsd 32(%esp), %xmm2 + movsd 40(%esp), %xmm3 + movsd 48(%esp), %xmm4 + movsd 56(%esp), %xmm5 + movsd 64(%esp), %xmm6 + movsd 72(%esp), %xmm7 + + // Remove save everything callee save method, stack alignment padding and FPRs. + addl MACRO_LITERAL(16 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8)) + + // Restore core registers. + POP eax + POP ecx + POP edx + POP ebx + POP ebp + POP esi + POP edi +END_MACRO + + /* * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending * exception is Thread::Current()->exception_. */ @@ -661,22 +729,6 @@ DEFINE_FUNCTION art_quick_invoke_static_stub ret END_FUNCTION art_quick_invoke_static_stub -MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) - DEFINE_FUNCTION VAR(c_name) - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC - // Outgoing argument set up - subl MACRO_LITERAL(12), %esp // push padding - CFI_ADJUST_CFA_OFFSET(12) - pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() - CFI_ADJUST_CFA_OFFSET(4) - call CALLVAR(cxx_name) // cxx_name(Thread*) - addl MACRO_LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address - CALL_MACRO(return_macro) // return or deliver exception - END_FUNCTION VAR(c_name) -END_MACRO - MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION VAR(c_name) SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC @@ -1028,7 +1080,13 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx // Read barrier for class load. cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + // Null check so that we can load the lock word. + testl %edx, %edx + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: @@ -1065,7 +1123,7 @@ DEFINE_FUNCTION art_quick_lock_object test LITERAL(LOCK_WORD_STATE_MASK), %ecx // test the 2 high bits. jne .Lslow_lock // slow path if either of the two high bits are set. movl %ecx, %edx // save lock word (edx) to keep read barrier bits. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. test %ecx, %ecx jnz .Lalready_thin // lock word contains a thin lock // unlocked case - edx: original lock word, eax: obj. @@ -1081,9 +1139,9 @@ DEFINE_FUNCTION art_quick_lock_object cmpw %cx, %dx // do we hold the lock already? jne .Lslow_lock movl %edx, %ecx // copy the lock word to check count overflow. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits. addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check. - test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set. + test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set. jne .Lslow_lock // count overflowed so go slow movl %eax, %ecx // save obj to use eax for cmpxchg. movl %edx, %eax // copy the lock word as the old val for cmpxchg. @@ -1137,13 +1195,13 @@ DEFINE_FUNCTION art_quick_unlock_object cmpw %cx, %dx // does the thread id match? jne .Lslow_unlock movl %ecx, %edx // copy the lock word to detect new count of 0. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx jae .Lrecursive_thin_unlock // update lockword, cmpxchg necessary for read barrier bits. movl %eax, %edx // edx: obj movl %ecx, %eax // eax: old lock word. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits. #ifndef USE_READ_BARRIER movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) #else @@ -1397,7 +1455,19 @@ DEFINE_FUNCTION art_quick_memcpy ret END_FUNCTION art_quick_memcpy -NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret +DEFINE_FUNCTION art_quick_test_suspend + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME ebx, ebx // save everything for GC + // Outgoing argument set up + subl MACRO_LITERAL(12), %esp // push padding + CFI_ADJUST_CFA_OFFSET(12) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artTestSuspendFromCode) // (Thread*) + addl MACRO_LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // restore frame up to return address + ret // return +END_FUNCTION art_quick_test_suspend DEFINE_FUNCTION art_quick_d2l subl LITERAL(12), %esp // alignment padding, room for argument @@ -1923,6 +1993,14 @@ END_FUNCTION art_nested_signal_return // convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) + // Null check so that we can load the lock word. + test REG_VAR(reg), REG_VAR(reg) + jz .Lret_rb_\name + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) + jz .Lslow_rb_\name + ret +.Lslow_rb_\name: // Save all potentially live caller-save core registers. PUSH eax PUSH ecx @@ -1970,6 +2048,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) POP_REG_NE edx, RAW_VAR(reg) POP_REG_NE ecx, RAW_VAR(reg) POP_REG_NE eax, RAW_VAR(reg) +.Lret_rb_\name: ret END_FUNCTION VAR(name) END_MACRO diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h index 24c671c367..a1612c38a7 100644 --- a/runtime/arch/x86/quick_method_frame_info_x86.h +++ b/runtime/arch/x86/quick_method_frame_info_x86.h @@ -36,21 +36,33 @@ enum XMM { XMM7 = 7, }; +static constexpr uint32_t kX86CalleeSaveAlwaysSpills = + (1 << art::x86::kNumberOfCpuRegisters); // Fake return address callee save. static constexpr uint32_t kX86CalleeSaveRefSpills = (1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI); static constexpr uint32_t kX86CalleeSaveArgSpills = (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX); +static constexpr uint32_t kX86CalleeSaveEverythingSpills = + (1 << art::x86::EAX) | (1 << art::x86::ECX) | (1 << art::x86::EDX) | (1 << art::x86::EBX); + static constexpr uint32_t kX86CalleeSaveFpArgSpills = (1 << art::x86::XMM0) | (1 << art::x86::XMM1) | (1 << art::x86::XMM2) | (1 << art::x86::XMM3); +static constexpr uint32_t kX86CalleeSaveFpEverythingSpills = + (1 << art::x86::XMM0) | (1 << art::x86::XMM1) | + (1 << art::x86::XMM2) | (1 << art::x86::XMM3) | + (1 << art::x86::XMM4) | (1 << art::x86::XMM5) | + (1 << art::x86::XMM6) | (1 << art::x86::XMM7); constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { - return kX86CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) | - (1 << art::x86::kNumberOfCpuRegisters); // fake return address callee save + return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills | + (type == Runtime::kRefsAndArgs ? kX86CalleeSaveArgSpills : 0) | + (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0); } constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0; + return (type == Runtime::kRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) | + (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0); } constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h index 48bec73239..58dc2fe23a 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.h +++ b/runtime/arch/x86_64/asm_support_x86_64.h @@ -21,6 +21,7 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE (64 + 4*8) #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE (64 + 4*8) -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (176 + 4*8) +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (112 + 12*8) +#define FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE (144 + 16*8) #endif // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_ diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 784ec394a8..3048404745 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -165,8 +165,8 @@ MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME) PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots for ArtMethod*. - subq MACRO_LITERAL(80 + 4 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) + subq MACRO_LITERAL(16 + 12 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(16 + 12 * 8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Save FPRs. @@ -189,7 +189,7 @@ MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 12 * 8 + 16 + 8) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -260,6 +260,108 @@ MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME) POP r15 END_MACRO + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) + */ +MACRO0(SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME) +#if defined(__APPLE__) + int3 + int3 +#else + // Save core registers from highest to lowest to agree with core spills bitmap. + PUSH r15 + PUSH r14 + PUSH r13 + PUSH r12 + PUSH r11 + PUSH r10 + PUSH r9 + PUSH r8 + PUSH rdi + PUSH rsi + PUSH rbp + PUSH rbx + PUSH rdx + PUSH rcx + PUSH rax + // Create space for FPRs and stack alignment padding. + subq MACRO_LITERAL(8 + 16 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(8 + 16 * 8) + // R10 := Runtime::Current() + movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 + movq (%r10), %r10 + // Save FPRs. + movq %xmm0, 8(%rsp) + movq %xmm1, 16(%rsp) + movq %xmm2, 24(%rsp) + movq %xmm3, 32(%rsp) + movq %xmm4, 40(%rsp) + movq %xmm5, 48(%rsp) + movq %xmm6, 56(%rsp) + movq %xmm7, 64(%rsp) + movq %xmm8, 72(%rsp) + movq %xmm9, 80(%rsp) + movq %xmm10, 88(%rsp) + movq %xmm11, 96(%rsp) + movq %xmm12, 104(%rsp) + movq %xmm13, 112(%rsp) + movq %xmm14, 120(%rsp) + movq %xmm15, 128(%rsp) + // Push ArtMethod* for save everything frame method. + pushq RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET(%r10) + CFI_ADJUST_CFA_OFFSET(8) + // Store rsp as the top quick frame. + movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET + + // Ugly compile-time check, but we only have the preprocessor. + // Last +8: implicit return address pushed on stack when caller made call. +#if (FRAME_SIZE_SAVE_EVERYTHING_CALLEE_SAVE != 15 * 8 + 16 * 8 + 16 + 8) +#error "SAVE_EVERYTHING_CALLEE_SAVE_FRAME(X86_64) size not as expected." +#endif +#endif // __APPLE__ +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME) + // Restore FPRs. Method and padding is still on the stack. + movq 16(%rsp), %xmm0 + movq 24(%rsp), %xmm1 + movq 32(%rsp), %xmm2 + movq 40(%rsp), %xmm3 + movq 48(%rsp), %xmm4 + movq 56(%rsp), %xmm5 + movq 64(%rsp), %xmm6 + movq 72(%rsp), %xmm7 + movq 80(%rsp), %xmm8 + movq 88(%rsp), %xmm9 + movq 96(%rsp), %xmm10 + movq 104(%rsp), %xmm11 + movq 112(%rsp), %xmm12 + movq 120(%rsp), %xmm13 + movq 128(%rsp), %xmm14 + movq 136(%rsp), %xmm15 + + // Remove save everything callee save method, stack alignment padding and FPRs. + addq MACRO_LITERAL(16 + 16 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) + // Restore callee and GPR args, mixed together to agree with core spills bitmap. + POP rax + POP rcx + POP rdx + POP rbx + POP rbp + POP rsi + POP rdi + POP r8 + POP r9 + POP r10 + POP r11 + POP r12 + POP r13 + POP r14 + POP r15 +END_MACRO + /* * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending @@ -702,17 +804,6 @@ DEFINE_FUNCTION art_quick_do_long_jump #endif // __APPLE__ END_FUNCTION art_quick_do_long_jump -MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) - DEFINE_FUNCTION VAR(c_name) - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC - // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() - call VAR(cxx_name) // cxx_name(Thread*) - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address - CALL_MACRO(return_macro) // return or deliver exception - END_FUNCTION VAR(c_name) -END_MACRO - MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION VAR(c_name) SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC @@ -989,7 +1080,13 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab // Load the class movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + // Null check so that we can load the lock word. + testl %edx, %edx + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: @@ -1022,7 +1119,7 @@ DEFINE_FUNCTION art_quick_lock_object test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits. jne .Lslow_lock // Slow path if either of the two high bits are set. movl %ecx, %edx // save lock word (edx) to keep read barrier bits. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. test %ecx, %ecx jnz .Lalready_thin // Lock word contains a thin lock. // unlocked case - edx: original lock word, edi: obj. @@ -1037,9 +1134,9 @@ DEFINE_FUNCTION art_quick_lock_object cmpw %cx, %dx // do we hold the lock already? jne .Lslow_lock movl %edx, %ecx // copy the lock word to check count overflow. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count - test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if either of the upper two bits (28-29) are set + test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set jne .Lslow_lock // count overflowed so go slow movl %edx, %eax // copy the lock word as the old val for cmpxchg. addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. @@ -1074,12 +1171,12 @@ DEFINE_FUNCTION art_quick_unlock_object cmpw %cx, %dx // does the thread id match? jne .Lslow_unlock movl %ecx, %edx // copy the lock word to detect new count of 0. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx // zero the read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx jae .Lrecursive_thin_unlock // update lockword, cmpxchg necessary for read barrier bits. movl %ecx, %eax // eax: old lock word. - andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // ecx: new lock word zero except original rb bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits. #ifndef USE_READ_BARRIER movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) #else @@ -1329,7 +1426,14 @@ DEFINE_FUNCTION art_quick_memcpy ret END_FUNCTION art_quick_memcpy -NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret +DEFINE_FUNCTION art_quick_test_suspend + SETUP_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // save everything for GC + // Outgoing argument set up + movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() + call SYMBOL(artTestSuspendFromCode) // (Thread*) + RESTORE_SAVE_EVERYTHING_CALLEE_SAVE_FRAME // restore frame up to return address + ret +END_FUNCTION art_quick_test_suspend UNIMPLEMENTED art_quick_ldiv UNIMPLEMENTED art_quick_lmod @@ -1833,6 +1937,14 @@ END_FUNCTION art_nested_signal_return // convention (e.g. standard callee-save registers are preserved). MACRO2(READ_BARRIER_MARK_REG, name, reg) DEFINE_FUNCTION VAR(name) + // Null check so that we can load the lock word. + testq REG_VAR(reg), REG_VAR(reg) + jz .Lret_rb_\name + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) + jz .Lslow_rb_\name + ret +.Lslow_rb_\name: // Save all potentially live caller-save core registers. PUSH rax PUSH rcx @@ -1897,6 +2009,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) POP_REG_NE rdx, RAW_VAR(reg) POP_REG_NE rcx, RAW_VAR(reg) POP_REG_NE rax, RAW_VAR(reg) +.Lret_rb_\name: ret END_FUNCTION VAR(name) END_MACRO diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h index 37eff831f9..aa75b56f2e 100644 --- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h +++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h @@ -25,12 +25,19 @@ namespace art { namespace x86_64 { +static constexpr uint32_t kX86_64CalleeSaveAlwaysSpills = + (1 << art::x86_64::kNumberOfCpuRegisters); // Fake return address callee save. static constexpr uint32_t kX86_64CalleeSaveRefSpills = (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) | (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15); static constexpr uint32_t kX86_64CalleeSaveArgSpills = (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::R8) | (1 << art::x86_64::R9); +static constexpr uint32_t kX86_64CalleeSaveEverythingSpills = + (1 << art::x86_64::RAX) | (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) | + (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) | (1 << art::x86_64::R8) | + (1 << art::x86_64::R9) | (1 << art::x86_64::R10) | (1 << art::x86_64::R11); + static constexpr uint32_t kX86_64CalleeSaveFpArgSpills = (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) | @@ -38,16 +45,24 @@ static constexpr uint32_t kX86_64CalleeSaveFpArgSpills = static constexpr uint32_t kX86_64CalleeSaveFpSpills = (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) | (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15); +static constexpr uint32_t kX86_64CalleeSaveFpEverythingSpills = + (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | + (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) | + (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) | + (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7) | + (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) | + (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11); constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { - return kX86_64CalleeSaveRefSpills | + return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) | - (1 << art::x86_64::kNumberOfCpuRegisters); // fake return address callee save; + (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0); } constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { return kX86_64CalleeSaveFpSpills | - (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); + (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) | + (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0); } constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index 16087a572f..a6eb5f6261 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -63,7 +63,9 @@ const uint8_t DexFile::kDexMagicVersions[DexFile::kNumDexVersions][DexFile::kDex {'0', '3', '5', '\0'}, // Dex version 036 skipped because of an old dalvik bug on some versions of android where dex // files with that version number would erroneously be accepted and run. - {'0', '3', '7', '\0'} + {'0', '3', '7', '\0'}, + // Dex version 038: Android "O" and beyond. + {'0', '3', '8', '\0'} }; bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) { @@ -336,6 +338,11 @@ std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive, *error_code = ZipOpenErrorCode::kEntryNotFound; return nullptr; } + if (zip_entry->GetUncompressedLength() == 0) { + *error_msg = StringPrintf("Dex file '%s' has zero length", location.c_str()); + *error_code = ZipOpenErrorCode::kDexFileError; + return nullptr; + } std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg)); if (map.get() == nullptr) { *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(), @@ -433,6 +440,8 @@ std::unique_ptr<const DexFile> DexFile::OpenMemory(const uint8_t* base, MemMap* mem_map, const OatDexFile* oat_dex_file, std::string* error_msg) { + DCHECK(base != nullptr); + DCHECK_NE(size, 0U); CHECK_ALIGNED(base, 4); // various dex file structures must be word aligned std::unique_ptr<DexFile> dex_file( new DexFile(base, size, location, location_checksum, mem_map, oat_dex_file)); diff --git a/runtime/dex_file.h b/runtime/dex_file.h index 3dffe4b6f1..2eca495a13 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -63,7 +63,7 @@ class DexFile { static const uint32_t kClassDefinitionOrderEnforcedVersion = 37; static const uint8_t kDexMagic[]; - static constexpr size_t kNumDexVersions = 2; + static constexpr size_t kNumDexVersions = 3; static constexpr size_t kDexVersionLen = 4; static const uint8_t kDexMagicVersions[kNumDexVersions][kDexVersionLen]; diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc index 4f8e6f1fc0..2704d8a010 100644 --- a/runtime/dex_file_test.cc +++ b/runtime/dex_file_test.cc @@ -133,8 +133,46 @@ static const char kRawDex[] = "AAACAAAAQAEAAAEgAAACAAAAVAEAAAYgAAACAAAAiAEAAAEQAAABAAAAqAEAAAIgAAAPAAAArgEA" "AAMgAAACAAAAiAIAAAQgAAADAAAAlAIAAAAgAAACAAAAqwIAAAAQAAABAAAAxAIAAA=="; -static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64, - const char* location) { +// kRawDex38 and 39 are dex'ed versions of the following Java source : +// +// public class Main { +// public static void main(String[] foo) { +// } +// } +// +// The dex file was manually edited to change its dex version code to 38 +// or 39, respectively. +static const char kRawDex38[] = + "ZGV4CjAzOAC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI" + "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB" + "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA" + "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA" + "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB" + "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW" + "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA" + "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA" + "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC" + "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA=="; + +static const char kRawDex39[] = + "ZGV4CjAzOQC4OovJlJ1089ikzK6asMf/f8qp3Kve5VsgAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAI" + "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAwAQAA8AAAACIB" + "AAAqAQAAMgEAAEYBAABRAQAAVAEAAFgBAABtAQAAAQAAAAIAAAAEAAAABgAAAAQAAAACAAAAAAAA" + "AAUAAAACAAAAHAEAAAAAAAAAAAAAAAABAAcAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAADAAAA" + "AAAAAH4BAAAAAAAAAQABAAEAAABzAQAABAAAAHAQAgAAAA4AAQABAAAAAAB4AQAAAQAAAA4AAAAB" + "AAAAAwAGPGluaXQ+AAZMTWFpbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAJTWFpbi5qYXZhAAFWAAJW" + "TAATW0xqYXZhL2xhbmcvU3RyaW5nOwAEbWFpbgABAAcOAAMBAAcOAAAAAgAAgYAE8AEBCYgCDAAA" + "AAAAAAABAAAAAAAAAAEAAAAIAAAAcAAAAAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAA" + "uAAAAAYAAAABAAAA0AAAAAEgAAACAAAA8AAAAAEQAAABAAAAHAEAAAIgAAAIAAAAIgEAAAMgAAAC" + "AAAAcwEAAAAgAAABAAAAfgEAAAAQAAABAAAAjAEAAA=="; + +static const char kRawDexZeroLength[] = + "UEsDBAoAAAAAAOhxAkkAAAAAAAAAAAAAAAALABwAY2xhc3Nlcy5kZXhVVAkAA2QNoVdnDaFXdXgL" + "AAEE5AMBAASIEwAAUEsBAh4DCgAAAAAA6HECSQAAAAAAAAAAAAAAAAsAGAAAAAAAAAAAAKCBAAAA" + "AGNsYXNzZXMuZGV4VVQFAANkDaFXdXgLAAEE5AMBAASIEwAAUEsFBgAAAAABAAEAUQAAAEUAAAAA" + "AA=="; + +static void DecodeAndWriteDexFile(const char* base64, const char* location) { // decode base64 CHECK(base64 != nullptr); size_t length; @@ -150,7 +188,11 @@ static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64, if (file->FlushCloseOrErase() != 0) { PLOG(FATAL) << "Could not flush and close test file."; } - file.reset(); +} + +static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64, + const char* location) { + DecodeAndWriteDexFile(base64, location); // read dex file ScopedObjectAccess soa(Thread::Current()); @@ -197,6 +239,39 @@ TEST_F(DexFileTest, Header) { EXPECT_EQ(header.checksum_, raw->GetLocationChecksum()); } +TEST_F(DexFileTest, Version38Accepted) { + ScratchFile tmp; + std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kRawDex38, tmp.GetFilename().c_str())); + ASSERT_TRUE(raw.get() != nullptr); + + const DexFile::Header& header = raw->GetHeader(); + EXPECT_EQ(38u, header.GetVersion()); +} + +TEST_F(DexFileTest, Version39Rejected) { + ScratchFile tmp; + const char* location = tmp.GetFilename().c_str(); + DecodeAndWriteDexFile(kRawDex39, location); + + ScopedObjectAccess soa(Thread::Current()); + static constexpr bool kVerifyChecksum = true; + std::string error_msg; + std::vector<std::unique_ptr<const DexFile>> dex_files; + ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files)); +} + +TEST_F(DexFileTest, ZeroLengthDexRejected) { + ScratchFile tmp; + const char* location = tmp.GetFilename().c_str(); + DecodeAndWriteDexFile(kRawDexZeroLength, location); + + ScopedObjectAccess soa(Thread::Current()); + static constexpr bool kVerifyChecksum = true; + std::string error_msg; + std::vector<std::unique_ptr<const DexFile>> dex_files; + ASSERT_FALSE(DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files)); +} + TEST_F(DexFileTest, GetLocationChecksum) { ScopedObjectAccess soa(Thread::Current()); std::unique_ptr<const DexFile> raw(OpenTestDexFile("Main")); diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index 4019a5b536..fb774a4d1e 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -154,11 +154,30 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { + mirror::Object* ret; + // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller + // is array allocations. + if (from_ref == nullptr || from_ref->GetMarkBit()) { + return from_ref; + } // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 if (UNLIKELY(mark_from_read_barrier_measurements_)) { - return MarkFromReadBarrierWithMeasurements(from_ref); + ret = MarkFromReadBarrierWithMeasurements(from_ref); + } else { + ret = Mark(from_ref); + } + // Only set the mark bit for baker barrier. + if (kUseBakerReadBarrier && LIKELY(!rb_mark_bit_stack_full_ && ret->AtomicSetMarkBit(0, 1))) { + // If the mark stack is full, we may temporarily go to mark and back to unmarked. Seeing both + // values are OK since the only race is doing an unnecessary Mark. + if (!rb_mark_bit_stack_->AtomicPushBack(ret)) { + // Mark stack is full, set the bit back to zero. + CHECK(ret->AtomicSetMarkBit(1, 0)); + // Set rb_mark_bit_stack_full_, this is racy but OK since AtomicPushBack is thread safe. + rb_mark_bit_stack_full_ = true; + } } - return Mark(from_ref); + return ret; } inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index d7221e4578..071537db91 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -42,9 +42,6 @@ namespace gc { namespace collector { static constexpr size_t kDefaultGcMarkStackSize = 2 * MB; -// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty -// pages. -static constexpr bool kGrayDirtyImmuneObjects = true; // If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod // union table. Disabled since it does not seem to help the pause much. static constexpr bool kFilterModUnionCards = kIsDebugBuild; @@ -52,6 +49,9 @@ static constexpr bool kFilterModUnionCards = kIsDebugBuild; // ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers. // Only enabled for kIsDebugBuild to avoid performance hit. static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild; +// Slow path mark stack size, increase this if the stack is getting full and it is causing +// performance problems. +static constexpr size_t kReadBarrierMarkStackSize = 512 * KB; ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix, @@ -63,6 +63,10 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack", kDefaultGcMarkStackSize, kDefaultGcMarkStackSize)), + rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack", + kReadBarrierMarkStackSize, + kReadBarrierMarkStackSize)), + rb_mark_bit_stack_full_(false), mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock), thread_running_gc_(nullptr), is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false), @@ -187,6 +191,7 @@ void ConcurrentCopying::InitializePhase() { CHECK(false_gray_stack_.empty()); } + rb_mark_bit_stack_full_ = false; mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_; if (measure_read_barrier_slow_path_) { rb_slow_path_ns_.StoreRelaxed(0); @@ -914,9 +919,9 @@ class ConcurrentCopying::VerifyNoFromSpaceRefsVisitor : public SingleRootVisitor } collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref); if (kUseBakerReadBarrier) { - CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()) + CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << "Ref " << ref << " " << PrettyTypeOf(ref) - << " has non-white rb_ptr " << ref->GetReadBarrierPointer(); + << " has non-white rb_ptr "; } } @@ -982,7 +987,7 @@ class ConcurrentCopying::VerifyNoFromSpaceRefsObjectVisitor { VerifyNoFromSpaceRefsFieldVisitor visitor(collector); obj->VisitReferences(visitor, visitor); if (kUseBakerReadBarrier) { - CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr()) + CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer(); } } @@ -2243,6 +2248,15 @@ void ConcurrentCopying::FinishPhase() { } } } + if (kUseBakerReadBarrier) { + TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings()); + DCHECK(rb_mark_bit_stack_.get() != nullptr); + const auto* limit = rb_mark_bit_stack_->End(); + for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) { + CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0)); + } + rb_mark_bit_stack_->Reset(); + } } if (measure_read_barrier_slow_path_) { MutexLock mu(self, rb_slow_path_histogram_lock_); diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 72112fabc6..a862802b21 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -57,6 +57,9 @@ class ConcurrentCopying : public GarbageCollector { static constexpr bool kEnableFromSpaceAccountingCheck = kIsDebugBuild; // Enable verbose mode. static constexpr bool kVerboseMode = false; + // If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty + // pages. + static constexpr bool kGrayDirtyImmuneObjects = true; ConcurrentCopying(Heap* heap, const std::string& name_prefix = "", @@ -230,6 +233,8 @@ class ConcurrentCopying : public GarbageCollector { space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; std::unique_ptr<accounting::ObjectStack> gc_mark_stack_; + std::unique_ptr<accounting::ObjectStack> rb_mark_bit_stack_; + bool rb_mark_bit_stack_full_; std::vector<mirror::Object*> false_gray_stack_ GUARDED_BY(mark_stack_lock_); Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::vector<accounting::ObjectStack*> revoked_mark_stacks_ diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index a92cb2496c..5485cd2339 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2538,6 +2538,17 @@ void Heap::PreZygoteFork() { AddSpace(zygote_space_); non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity()); AddSpace(non_moving_space_); + if (kUseBakerReadBarrier && gc::collector::ConcurrentCopying::kGrayDirtyImmuneObjects) { + // Treat all of the objects in the zygote as marked to avoid unnecessary dirty pages. This is + // safe since we mark all of the objects that may reference non immune objects as gray. + zygote_space_->GetLiveBitmap()->VisitMarkedRange( + reinterpret_cast<uintptr_t>(zygote_space_->Begin()), + reinterpret_cast<uintptr_t>(zygote_space_->Limit()), + [](mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) { + CHECK(obj->AtomicSetMarkBit(0, 1)); + }); + } + // Create the zygote space mod union table. accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableCardCache("zygote space mod-union table", this, diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index d140b754ff..8ade18510a 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -1436,6 +1436,8 @@ ImageSpace* ImageSpace::Init(const char* image_filename, image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod)); CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs), image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod)); + CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything), + image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod)); } else if (!runtime->HasResolutionMethod()) { runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet()); runtime->SetResolutionMethod(image_header->GetImageMethod(ImageHeader::kResolutionMethod)); @@ -1448,6 +1450,8 @@ ImageSpace* ImageSpace::Init(const char* image_filename, image_header->GetImageMethod(ImageHeader::kRefsOnlySaveMethod), Runtime::kRefsOnly); runtime->SetCalleeSaveMethod( image_header->GetImageMethod(ImageHeader::kRefsAndArgsSaveMethod), Runtime::kRefsAndArgs); + runtime->SetCalleeSaveMethod( + image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything); } VLOG(image) << "ImageSpace::Init exiting " << *space.get(); diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index 5d62b597e9..c66029d32e 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -32,6 +32,8 @@ DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET), DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsOnly)))) #define RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 0x10 DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kRefsAndArgs)))) +#define RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET 0x18 +DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_CALLEE_SAVE_FRAME_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything)))) #define THREAD_FLAGS_OFFSET 0 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value()))) #define THREAD_ID_OFFSET 12 @@ -74,12 +76,22 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_STATE_SHIFT), (static_cast<int32_ DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kStateMaskShifted))) #define LOCK_WORD_READ_BARRIER_STATE_SHIFT 28 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_READ_BARRIER_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kReadBarrierStateShift))) -#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x30000000 +#define LOCK_WORD_READ_BARRIER_STATE_MASK 0x10000000 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShifted))) -#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xcfffffff +#define LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED 0xefffffff DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled))) #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne))) +#define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000 +DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted))) +#define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff +DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShiftedToggled))) +#define LOCK_WORD_GC_STATE_SHIFT 28 +DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_GC_STATE_SHIFT), (static_cast<int32_t>(art::LockWord::kGCStateShift))) +#define LOCK_WORD_MARK_BIT_SHIFT 29 +DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_MARK_BIT_SHIFT), (static_cast<int32_t>(art::LockWord::kMarkBitStateShift))) +#define LOCK_WORD_MARK_BIT_MASK_SHIFTED 0x20000000 +DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kMarkBitStateMaskShifted))) #define OBJECT_ALIGNMENT_MASK 0x7 DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1))) #define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8 diff --git a/runtime/globals.h b/runtime/globals.h index 0b44c47092..9045d404e6 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -47,7 +47,8 @@ static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) { } // Required object alignment -static constexpr size_t kObjectAlignment = 8; +static constexpr size_t kObjectAlignmentShift = 3; +static constexpr size_t kObjectAlignment = 1u << kObjectAlignmentShift; static constexpr size_t kLargeObjectAlignment = kPageSize; // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't. diff --git a/runtime/image.h b/runtime/image.h index a98cea1bab..207a8189a2 100644 --- a/runtime/image.h +++ b/runtime/image.h @@ -186,6 +186,7 @@ class PACKED(4) ImageHeader { kCalleeSaveMethod, kRefsOnlySaveMethod, kRefsAndArgsSaveMethod, + kSaveEverythingMethod, kImageMethodsCount, // Number of elements in enum. }; diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h index 341501b46c..4a2a293400 100644 --- a/runtime/lock_word-inl.h +++ b/runtime/lock_word-inl.h @@ -43,17 +43,15 @@ inline Monitor* LockWord::FatLockMonitor() const { inline size_t LockWord::ForwardingAddress() const { DCHECK_EQ(GetState(), kForwardingAddress); - return value_ << kStateSize; + return value_ << kForwardingAddressShift; } inline LockWord::LockWord() : value_(0) { DCHECK_EQ(GetState(), kUnlocked); } -inline LockWord::LockWord(Monitor* mon, uint32_t rb_state) - : value_(mon->GetMonitorId() | (rb_state << kReadBarrierStateShift) | - (kStateFat << kStateShift)) { - DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U); +inline LockWord::LockWord(Monitor* mon, uint32_t gc_state) + : value_(mon->GetMonitorId() | (gc_state << kGCStateShift) | (kStateFat << kStateShift)) { #ifndef __LP64__ DCHECK_ALIGNED(mon, kMonitorIdAlignment); #endif diff --git a/runtime/lock_word.h b/runtime/lock_word.h index 5d0d204636..538b6ebbba 100644 --- a/runtime/lock_word.h +++ b/runtime/lock_word.h @@ -35,27 +35,27 @@ class Monitor; * the state. The four possible states are fat locked, thin/unlocked, hash code, and forwarding * address. When the lock word is in the "thin" state and its bits are formatted as follows: * - * |33|22|222222221111|1111110000000000| - * |10|98|765432109876|5432109876543210| - * |00|rb| lock count |thread id owner | + * |33|2|2|222222221111|1111110000000000| + * |10|9|8|765432109876|5432109876543210| + * |00|m|r| lock count |thread id owner | * * When the lock word is in the "fat" state and its bits are formatted as follows: * - * |33|22|2222222211111111110000000000| - * |10|98|7654321098765432109876543210| - * |01|rb| MonitorId | + * |33|2|2|2222222211111111110000000000| + * |10|9|8|7654321098765432109876543210| + * |01|m|r| MonitorId | * * When the lock word is in hash state and its bits are formatted as follows: * - * |33|22|2222222211111111110000000000| - * |10|98|7654321098765432109876543210| - * |10|rb| HashCode | + * |33|2|2|2222222211111111110000000000| + * |10|9|8|7654321098765432109876543210| + * |10|m|r| HashCode | * - * When the lock word is in fowarding address state and its bits are formatted as follows: + * When the lock word is in forwarding address state and its bits are formatted as follows: * - * |33|22|2222222211111111110000000000| - * |10|98|7654321098765432109876543210| - * |11| ForwardingAddress | + * |33|2|22222222211111111110000000000| + * |10|9|87654321098765432109876543210| + * |11|0| ForwardingAddress | * * The rb bits store the read barrier state. */ @@ -64,11 +64,13 @@ class LockWord { enum SizeShiftsAndMasks { // private marker to avoid generate-operator-out.py from processing. // Number of bits to encode the state, currently just fat or thin/unlocked or hash code. kStateSize = 2, - kReadBarrierStateSize = 2, + kReadBarrierStateSize = 1, + kMarkBitStateSize = 1, // Number of bits to encode the thin lock owner. kThinLockOwnerSize = 16, // Remaining bits are the recursive lock count. - kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize, + kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize - + kMarkBitStateSize, // Thin lock bits. Owner in lowest bits. kThinLockOwnerShift = 0, @@ -81,25 +83,43 @@ class LockWord { kThinLockCountOne = 1 << kThinLockCountShift, // == 65536 (0x10000) // State in the highest bits. - kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift, + kStateShift = kReadBarrierStateSize + kThinLockCountSize + kThinLockCountShift + + kMarkBitStateSize, kStateMask = (1 << kStateSize) - 1, kStateMaskShifted = kStateMask << kStateShift, kStateThinOrUnlocked = 0, kStateFat = 1, kStateHash = 2, kStateForwardingAddress = 3, + + // Read barrier bit. kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift, kReadBarrierStateMask = (1 << kReadBarrierStateSize) - 1, kReadBarrierStateMaskShifted = kReadBarrierStateMask << kReadBarrierStateShift, kReadBarrierStateMaskShiftedToggled = ~kReadBarrierStateMaskShifted, + // Mark bit. + kMarkBitStateShift = kReadBarrierStateSize + kReadBarrierStateShift, + kMarkBitStateMask = (1 << kMarkBitStateSize) - 1, + kMarkBitStateMaskShifted = kMarkBitStateMask << kMarkBitStateShift, + kMarkBitStateMaskShiftedToggled = ~kMarkBitStateMaskShifted, + + // GC state is mark bit and read barrier state. + kGCStateSize = kReadBarrierStateSize + kMarkBitStateSize, + kGCStateShift = kReadBarrierStateShift, + kGCStateMaskShifted = kReadBarrierStateMaskShifted | kMarkBitStateMaskShifted, + kGCStateMaskShiftedToggled = ~kGCStateMaskShifted, + // When the state is kHashCode, the non-state bits hold the hashcode. // Note Object.hashCode() has the hash code layout hardcoded. kHashShift = 0, - kHashSize = 32 - kStateSize - kReadBarrierStateSize, + kHashSize = 32 - kStateSize - kReadBarrierStateSize - kMarkBitStateSize, kHashMask = (1 << kHashSize) - 1, kMaxHash = kHashMask, + // Forwarding address shift. + kForwardingAddressShift = kObjectAlignmentShift, + kMonitorIdShift = kHashShift, kMonitorIdSize = kHashSize, kMonitorIdMask = kHashMask, @@ -108,31 +128,31 @@ class LockWord { kMaxMonitorId = kMaxHash }; - static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t rb_state) { + static LockWord FromThinLockId(uint32_t thread_id, uint32_t count, uint32_t gc_state) { CHECK_LE(thread_id, static_cast<uint32_t>(kThinLockMaxOwner)); CHECK_LE(count, static_cast<uint32_t>(kThinLockMaxCount)); - DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U); - return LockWord((thread_id << kThinLockOwnerShift) | (count << kThinLockCountShift) | - (rb_state << kReadBarrierStateShift) | + // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U); + return LockWord((thread_id << kThinLockOwnerShift) | + (count << kThinLockCountShift) | + (gc_state << kGCStateShift) | (kStateThinOrUnlocked << kStateShift)); } static LockWord FromForwardingAddress(size_t target) { DCHECK_ALIGNED(target, (1 << kStateSize)); - return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift)); + return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift)); } - static LockWord FromHashCode(uint32_t hash_code, uint32_t rb_state) { + static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) { CHECK_LE(hash_code, static_cast<uint32_t>(kMaxHash)); - DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U); + // DCHECK_EQ(gc_bits & kGCStateMaskToggled, 0U); return LockWord((hash_code << kHashShift) | - (rb_state << kReadBarrierStateShift) | + (gc_state << kGCStateShift) | (kStateHash << kStateShift)); } - static LockWord FromDefault(uint32_t rb_state) { - DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U); - return LockWord(rb_state << kReadBarrierStateShift); + static LockWord FromDefault(uint32_t gc_state) { + return LockWord(gc_state << kGCStateShift); } static bool IsDefault(LockWord lw) { @@ -154,7 +174,7 @@ class LockWord { LockState GetState() const { CheckReadBarrierState(); if ((!kUseReadBarrier && UNLIKELY(value_ == 0)) || - (kUseReadBarrier && UNLIKELY((value_ & kReadBarrierStateMaskShiftedToggled) == 0))) { + (kUseReadBarrier && UNLIKELY((value_ & kGCStateMaskShiftedToggled) == 0))) { return kUnlocked; } else { uint32_t internal_state = (value_ >> kStateShift) & kStateMask; @@ -176,6 +196,10 @@ class LockWord { return (value_ >> kReadBarrierStateShift) & kReadBarrierStateMask; } + uint32_t GCState() const { + return (value_ & kGCStateMaskShifted) >> kGCStateShift; + } + void SetReadBarrierState(uint32_t rb_state) { DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U); DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress)); @@ -184,6 +208,19 @@ class LockWord { value_ |= (rb_state & kReadBarrierStateMask) << kReadBarrierStateShift; } + + uint32_t MarkBitState() const { + return (value_ >> kMarkBitStateShift) & kMarkBitStateMask; + } + + void SetMarkBitState(uint32_t mark_bit) { + DCHECK_EQ(mark_bit & ~kMarkBitStateMask, 0U); + DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress)); + // Clear and or the bits. + value_ &= kMarkBitStateMaskShiftedToggled; + value_ |= mark_bit << kMarkBitStateShift; + } + // Return the owner thin lock thread id. uint32_t ThinLockOwner() const; @@ -197,7 +234,7 @@ class LockWord { size_t ForwardingAddress() const; // Constructor a lock word for inflation to use a Monitor. - LockWord(Monitor* mon, uint32_t rb_state); + LockWord(Monitor* mon, uint32_t gc_state); // Return the hash code stored in the lock word, must be kHashCode state. int32_t GetHashCode() const; @@ -207,7 +244,7 @@ class LockWord { if (kIncludeReadBarrierState) { return lw1.GetValue() == lw2.GetValue(); } - return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState(); + return lw1.GetValueWithoutGCState() == lw2.GetValueWithoutGCState(); } void Dump(std::ostream& os) { @@ -248,9 +285,9 @@ class LockWord { return value_; } - uint32_t GetValueWithoutReadBarrierState() const { + uint32_t GetValueWithoutGCState() const { CheckReadBarrierState(); - return value_ & ~(kReadBarrierStateMask << kReadBarrierStateShift); + return value_ & kGCStateMaskShiftedToggled; } // Only Object should be converting LockWords to/from uints. diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 0592c6c1c8..0495c957c6 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -147,10 +147,20 @@ inline Object* Object::GetReadBarrierPointer() { #endif } +inline uint32_t Object::GetMarkBit() { +#ifdef USE_READ_BARRIER + return GetLockWord(false).MarkBitState(); +#else + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +#endif +} + inline void Object::SetReadBarrierPointer(Object* rb_ptr) { #ifdef USE_BAKER_READ_BARRIER DCHECK(kUseBakerReadBarrier); DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U); + DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported"; LockWord lw = GetLockWord(false); lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr))); SetLockWord(lw, false); @@ -173,6 +183,8 @@ inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* DCHECK(kUseBakerReadBarrier); DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U); DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U); + DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported"; + DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported"; LockWord expected_lw; LockWord new_lw; do { @@ -216,6 +228,24 @@ inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* #endif } +inline bool Object::AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) { + LockWord expected_lw; + LockWord new_lw; + do { + LockWord lw = GetLockWord(false); + if (UNLIKELY(lw.MarkBitState() != expected_mark_bit)) { + // Lost the race. + return false; + } + expected_lw = lw; + new_lw = lw; + new_lw.SetMarkBitState(mark_bit); + // Since this is only set from the mutator, we can use the non release Cas. + } while (!CasLockWordWeakRelaxed(expected_lw, new_lw)); + return true; +} + + inline void Object::AssertReadBarrierPointer() const { if (kUseBakerReadBarrier) { Object* obj = const_cast<Object*>(this); diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc index 701c600822..13c536e38d 100644 --- a/runtime/mirror/object.cc +++ b/runtime/mirror/object.cc @@ -163,8 +163,7 @@ int32_t Object::IdentityHashCode() const { case LockWord::kUnlocked: { // Try to compare and swap in a new hash, if we succeed we will return the hash on the next // loop iteration. - LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), - lw.ReadBarrierState()); + LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState()); DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode); if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) { return hash_word.GetHashCode(); diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index a4bdbad21b..5b129bf2ba 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -93,6 +93,7 @@ class MANAGED LOCKABLE Object { template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_); + // TODO: Clean this up and change to return int32_t Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_); #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER @@ -103,6 +104,12 @@ class MANAGED LOCKABLE Object { template<bool kCasRelease = false> ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_); + + ALWAYS_INLINE uint32_t GetMarkBit() SHARED_REQUIRES(Locks::mutator_lock_); + + ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit) + SHARED_REQUIRES(Locks::mutator_lock_); + void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_); // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in diff --git a/runtime/monitor.cc b/runtime/monitor.cc index bf9f931435..e863ea981d 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -155,7 +155,7 @@ bool Monitor::Install(Thread* self) { return false; } } - LockWord fat(this, lw.ReadBarrierState()); + LockWord fat(this, lw.GCState()); // Publish the updated lock word, which may race with other threads. bool success = GetObject()->CasLockWordWeakSequentiallyConsistent(lw, fat); // Lock profiling. @@ -774,20 +774,21 @@ bool Monitor::Deflate(Thread* self, mirror::Object* obj) { return false; } // Deflate to a thin lock. - LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_, - lw.ReadBarrierState()); + LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(), + monitor->lock_count_, + lw.GCState()); // Assume no concurrent read barrier state changes as mutators are suspended. obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / " << monitor->lock_count_; } else if (monitor->HasHashCode()) { - LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.ReadBarrierState()); + LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState()); // Assume no concurrent read barrier state changes as mutators are suspended. obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode(); } else { // No lock and no hash, just put an empty lock word inside the object. - LockWord new_lw = LockWord::FromDefault(lw.ReadBarrierState()); + LockWord new_lw = LockWord::FromDefault(lw.GCState()); // Assume no concurrent read barrier state changes as mutators are suspended. obj->SetLockWord(new_lw, false); VLOG(monitor) << "Deflated" << obj << " to empty lock word"; @@ -876,7 +877,7 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj, bool tr LockWord lock_word = h_obj->GetLockWord(true); switch (lock_word.GetState()) { case LockWord::kUnlocked: { - LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.ReadBarrierState())); + LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState())); if (h_obj->CasLockWordWeakSequentiallyConsistent(lock_word, thin_locked)) { AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */); // CasLockWord enforces more than the acquire ordering we need here. @@ -890,8 +891,9 @@ mirror::Object* Monitor::MonitorEnter(Thread* self, mirror::Object* obj, bool tr // We own the lock, increase the recursion count. uint32_t new_count = lock_word.ThinLockCount() + 1; if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) { - LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count, - lock_word.ReadBarrierState())); + LockWord thin_locked(LockWord::FromThinLockId(thread_id, + new_count, + lock_word.GCState())); if (!kUseReadBarrier) { h_obj->SetLockWord(thin_locked, true); AtraceMonitorLock(self, h_obj.Get(), false /* is_wait */); @@ -975,9 +977,9 @@ bool Monitor::MonitorExit(Thread* self, mirror::Object* obj) { LockWord new_lw = LockWord::Default(); if (lock_word.ThinLockCount() != 0) { uint32_t new_count = lock_word.ThinLockCount() - 1; - new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.ReadBarrierState()); + new_lw = LockWord::FromThinLockId(thread_id, new_count, lock_word.GCState()); } else { - new_lw = LockWord::FromDefault(lock_word.ReadBarrierState()); + new_lw = LockWord::FromDefault(lock_word.GCState()); } if (!kUseReadBarrier) { DCHECK_EQ(new_lw.ReadBarrierState(), 0U); diff --git a/runtime/oat.h b/runtime/oat.h index 2c5c3e636f..7c84fe90f9 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '8', '5', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h index 42e959c2bd..5d32c098b4 100644 --- a/runtime/read_barrier.h +++ b/runtime/read_barrier.h @@ -99,8 +99,9 @@ class ReadBarrier { // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them. static constexpr uintptr_t white_ptr_ = 0x0; // Not marked. static constexpr uintptr_t gray_ptr_ = 0x1; // Marked, but not marked through. On mark stack. + // TODO: black_ptr_ is unused, we should remove it. static constexpr uintptr_t black_ptr_ = 0x2; // Marked through. Used for non-moving objects. - static constexpr uintptr_t rb_ptr_mask_ = 0x3; // The low 2 bits for white|gray|black. + static constexpr uintptr_t rb_ptr_mask_ = 0x1; // The low bits for white|gray. }; } // namespace art diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h index bfa8c549bf..265587dc5c 100644 --- a/runtime/runtime-inl.h +++ b/runtime/runtime-inl.h @@ -45,9 +45,11 @@ inline QuickMethodFrameInfo Runtime::GetRuntimeMethodFrameInfo(ArtMethod* method return GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs); } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAll)) { return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAll); - } else { - DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly)); + } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kRefsOnly)) { return GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly); + } else { + DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything)); + return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything); } } diff --git a/runtime/runtime.h b/runtime/runtime.h index afa8e4818b..7e269af765 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -387,9 +387,10 @@ class Runtime { // Returns a special method that describes all callee saves being spilled to the stack. enum CalleeSaveType { - kSaveAll, + kSaveAll, // All callee-save registers. kRefsOnly, kRefsAndArgs, + kSaveEverything, // Even caller-save registers. kLastCalleeSaveType // Value used for iteration }; diff --git a/test/614-checker-dump-constant-location/expected.txt b/test/614-checker-dump-constant-location/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/614-checker-dump-constant-location/expected.txt diff --git a/test/614-checker-dump-constant-location/info.txt b/test/614-checker-dump-constant-location/info.txt new file mode 100644 index 0000000000..4a94fface0 --- /dev/null +++ b/test/614-checker-dump-constant-location/info.txt @@ -0,0 +1,2 @@ +Test that the graph visualizer outputs useful information for constant +locations in parallel moves. diff --git a/test/614-checker-dump-constant-location/src/Main.java b/test/614-checker-dump-constant-location/src/Main.java new file mode 100644 index 0000000000..f6bc063d86 --- /dev/null +++ b/test/614-checker-dump-constant-location/src/Main.java @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static int array_int[] = { 0 }; + public static long array_long[] = { 0 }; + public static float array_float[] = { 0.0f }; + public static double array_double[] = { 0.0 }; + + // The code used to print constant locations in parallel moves is architecture + // independent. We only test for ARM and ARM64 as it is easy: 'store' + // instructions only take registers as a source. + + /// CHECK-START-ARM: void Main.store_to_arrays() register (after) + /// CHECK: ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}} + + /// CHECK-START-ARM64: void Main.store_to_arrays() register (after) + /// CHECK: ParallelMove {{.*#1->.*#2->.*#3\.3->.*#4\.4->.*}} + + public void store_to_arrays() { + array_int[0] = 1; + array_long[0] = 2; + array_float[0] = 3.3f; + array_double[0] = 4.4; + } + + public static void main(String args[]) {} +} diff --git a/test/615-checker-arm64-zr-parallel-move/expected.txt b/test/615-checker-arm64-zr-parallel-move/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/615-checker-arm64-zr-parallel-move/expected.txt diff --git a/test/615-checker-arm64-zr-parallel-move/info.txt b/test/615-checker-arm64-zr-parallel-move/info.txt new file mode 100644 index 0000000000..199755d38d --- /dev/null +++ b/test/615-checker-arm64-zr-parallel-move/info.txt @@ -0,0 +1 @@ +Checker test to verify we correctly use wzr and xzr to synthesize zero constants. diff --git a/test/615-checker-arm64-zr-parallel-move/src/Main.java b/test/615-checker-arm64-zr-parallel-move/src/Main.java new file mode 100644 index 0000000000..5024f2881c --- /dev/null +++ b/test/615-checker-arm64-zr-parallel-move/src/Main.java @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static boolean doThrow = false; + + public void $noinline$foo(int in_w1, + int in_w2, + int in_w3, + int in_w4, + int in_w5, + int in_w6, + int in_w7, + int on_stack_int, + long on_stack_long, + float in_s0, + float in_s1, + float in_s2, + float in_s3, + float in_s4, + float in_s5, + float in_s6, + float in_s7, + float on_stack_float, + double on_stack_double) { + if (doThrow) throw new Error(); + } + + // We expect a parallel move that moves four times the zero constant to stack locations. + /// CHECK-START-ARM64: void Main.bar() register (after) + /// CHECK: ParallelMove {{.*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*#0->[0-9x]+\(sp\).*}} + + // Those four moves should generate four 'store' instructions using directly the zero register. + /// CHECK-START-ARM64: void Main.bar() disassembly (after) + /// CHECK-DAG: {{(str|stur)}} wzr, [sp, #{{[0-9]+}}] + /// CHECK-DAG: {{(str|stur)}} xzr, [sp, #{{[0-9]+}}] + /// CHECK-DAG: {{(str|stur)}} wzr, [sp, #{{[0-9]+}}] + /// CHECK-DAG: {{(str|stur)}} xzr, [sp, #{{[0-9]+}}] + + public void bar() { + $noinline$foo(1, 2, 3, 4, 5, 6, 7, // Integral values in registers. + 0, 0L, // Integral values on the stack. + 1, 2, 3, 4, 5, 6, 7, 8, // Floating-point values in registers. + 0.0f, 0.0); // Floating-point values on the stack. + } + + public static void main(String args[]) {} +} diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 8f8b667429..8d7d70dcbe 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -26,7 +26,8 @@ TEST_ART_RUN_TESTS := $(subst $(LOCAL_PATH)/,, $(TEST_ART_RUN_TESTS)) # The path where build only targets will be output, e.g. # out/target/product/generic_x86_64/obj/PACKAGING/art-run-tests_intermediates/DATA -art_run_tests_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA +art_run_tests_build_dir := $(call intermediates-dir-for,JAVA_LIBRARIES,art-run-tests)/DATA +art_run_tests_install_dir := $(call intermediates-dir-for,PACKAGING,art-run-tests)/DATA # A generated list of prerequisites that call 'run-test --build-only', the actual prerequisite is # an empty file touched in the intermediate directory. @@ -49,7 +50,8 @@ endif # Helper to create individual build targets for tests. Must be called with $(eval). # $(1): the test number define define-build-art-run-test - dmart_target := $(art_run_tests_dir)/art-run-tests/$(1)/touch + dmart_target := $(art_run_tests_build_dir)/art-run-tests/$(1)/touch + dmart_install_target := $(art_run_tests_install_dir)/art-run-tests/$(1)/touch run_test_options = --build-only ifeq ($(ART_TEST_QUIET),true) run_test_options += --quiet @@ -67,8 +69,13 @@ $$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPE $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1) $(hide) touch $$@ - TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_target) +$$(dmart_install_target): $$(dmart_target) + $(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@) + $(hide) cp $$(dir $$<)/* $$(dir $$@)/ + + TEST_ART_RUN_TEST_BUILD_RULES += $$(dmart_install_target) dmart_target := + dmart_install_target := run_test_options := endef $(foreach test, $(TEST_ART_RUN_TESTS), $(eval $(call define-build-art-run-test,$(test)))) @@ -78,12 +85,13 @@ LOCAL_MODULE_TAGS := tests LOCAL_MODULE := art-run-tests LOCAL_ADDITIONAL_DEPENDENCIES := $(TEST_ART_RUN_TEST_BUILD_RULES) # The build system use this flag to pick up files generated by declare-make-art-run-test. -LOCAL_PICKUP_FILES := $(art_run_tests_dir) +LOCAL_PICKUP_FILES := $(art_run_tests_install_dir) include $(BUILD_PHONY_PACKAGE) # Clear temp vars. -art_run_tests_dir := +art_run_tests_build_dir := +art_run_tests_install_dir := define-build-art-run-test := TEST_ART_RUN_TEST_BUILD_RULES := diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc index 806e130cb9..ee2ee1a36a 100644 --- a/test/common/runtime_state.cc +++ b/test/common/runtime_state.cc @@ -130,18 +130,18 @@ extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env, return; } - ScopedObjectAccess soa(Thread::Current()); - - ScopedUtfChars chars(env, method_name); - CHECK(chars.c_str() != nullptr); - - mirror::Class* klass = soa.Decode<mirror::Class*>(cls); - ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), kRuntimePointerSize); + ArtMethod* method = nullptr; + { + ScopedObjectAccess soa(Thread::Current()); + + ScopedUtfChars chars(env, method_name); + CHECK(chars.c_str() != nullptr); + method = soa.Decode<mirror::Class*>(cls)->FindDeclaredDirectMethodByName( + chars.c_str(), kRuntimePointerSize); + } jit::JitCodeCache* code_cache = jit->GetCodeCache(); OatQuickMethodHeader* header = nullptr; - // Make sure there is a profiling info, required by the compiler. - ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true); while (true) { header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode()); if (code_cache->ContainsPc(header->GetCode())) { @@ -149,6 +149,9 @@ extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env, } else { // Sleep to yield to the compiler thread. usleep(1000); + ScopedObjectAccess soa(Thread::Current()); + // Make sure there is a profiling info, required by the compiler. + ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true); // Will either ensure it's compiled or do the compilation itself. jit->CompileMethod(method, soa.Self(), /* osr */ false); } diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 64bf4f3046..c6c9380412 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -553,12 +553,10 @@ else if [ "$TIME_OUT" = "timeout" ]; then # Add timeout command if time out is desired. # - # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which - # will induce a full thread dump before abort. However, dumping threads might deadlock, - # so the outer timeout sends the regular SIGTERM after an additional minute to ensure - # termination (without dumping all threads). - TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 60)) - cmdline="timeout ${TIME_PLUS_ONE}s timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline" + # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump + # before abort. However, dumping threads might deadlock, so we also use the "-k" + # option to definitely kill the child. + cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline" fi if [ "$DEV_MODE" = "y" ]; then diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java index 3cdb40cc6d..8769d115ff 100644 --- a/tools/ahat/src/InstanceUtils.java +++ b/tools/ahat/src/InstanceUtils.java @@ -95,9 +95,7 @@ class InstanceUtils { return null; } - // TODO: When perflib provides a better way to get the length of the - // array, we should use that here. - int numChars = chars.getValues().length; + int numChars = chars.getLength(); int count = getIntField(inst, "count", numChars); if (count == 0) { return ""; diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def index c1e6099121..67ed5b5721 100644 --- a/tools/cpp-define-generator/constant_lockword.def +++ b/tools/cpp-define-generator/constant_lockword.def @@ -30,5 +30,12 @@ DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK, uint32_t, kReadBarrierStateMas DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled) DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE, int32_t, kThinLockCountOne) +DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED, uint32_t, kGCStateMaskShifted) +DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled) +DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT, int32_t, kGCStateShift) + +DEFINE_LOCK_WORD_EXPR(MARK_BIT_SHIFT, int32_t, kMarkBitStateShift) +DEFINE_LOCK_WORD_EXPR(MARK_BIT_MASK_SHIFTED, uint32_t, kMarkBitStateMaskShifted) + #undef DEFINE_LOCK_WORD_EXPR diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def index b327ca3a96..123992f473 100644 --- a/tools/cpp-define-generator/offset_runtime.def +++ b/tools/cpp-define-generator/offset_runtime.def @@ -34,6 +34,8 @@ DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL, kSaveAll) DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_ONLY, kRefsOnly) // Offset of field Runtime::callee_save_methods_[kRefsAndArgs] DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(REFS_AND_ARGS, kRefsAndArgs) +// Offset of field Runtime::callee_save_methods_[kSaveEverything] +DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything) #undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET #include "common_undef.def" // undef DEFINE_OFFSET_EXPR diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index bf8d12b9c6..8d87e4fafe 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -36,6 +36,15 @@ names: ["libcore.io.OsTest#testUnixDomainSockets_in_file_system"] }, { + description: "TCP_USER_TIMEOUT is not defined on host's tcp.h (glibc-2.15-4.8).", + result: EXEC_FAILED, + modes: [host], + names: ["libcore.android.system.OsConstantsTest#testTcpUserTimeoutIsDefined", + "libcore.io.OsTest#test_socket_tcpUserTimeout_setAndGet", + "libcore.io.OsTest#test_socket_tcpUserTimeout_doesNotWorkOnDatagramSocket"], + bug: 30402085 +}, +{ description: "Issue with incorrect device time (1970)", result: EXEC_FAILED, modes: [device], @@ -174,38 +183,7 @@ description: "Failing tests after OpenJDK move.", result: EXEC_FAILED, bug: 26326992, - names: ["libcore.icu.RelativeDateTimeFormatterTest#test_getRelativeDateTimeStringDST", - "libcore.java.lang.OldSystemTest#test_load", - "libcore.java.text.NumberFormatTest#test_currencyWithPatternDigits", - "libcore.java.text.NumberFormatTest#test_setCurrency", - "libcore.java.text.OldNumberFormatTest#test_getIntegerInstanceLjava_util_Locale", - "libcore.java.util.CalendarTest#testAddOneDayAndOneDayOver30MinuteDstForwardAdds48Hours", - "libcore.java.util.CalendarTest#testNewCalendarKoreaIsSelfConsistent", - "libcore.java.util.CalendarTest#testSetTimeInZoneWhereDstIsNoLongerUsed", - "libcore.java.util.CalendarTest#test_nullLocale", - "libcore.java.util.FormatterTest#test_numberLocalization", - "libcore.java.util.FormatterTest#test_uppercaseConversions", - "libcore.javax.crypto.CipherTest#testCipher_getInstance_WrongType_Failure", - "libcore.javax.crypto.CipherTest#testDecryptBufferZeroSize_mustDecodeToEmptyString", - "libcore.javax.security.auth.x500.X500PrincipalTest#testExceptionsForWrongDNs", - "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getDate", - "org.apache.harmony.luni.tests.java.net.URLConnectionTest#test_getExpiration", - "org.apache.harmony.regex.tests.java.util.regex.PatternSyntaxExceptionTest#testPatternSyntaxException", - "org.apache.harmony.tests.java.lang.FloatTest#test_parseFloat_LString_Harmony6261", - "org.apache.harmony.tests.java.lang.ThreadTest#test_isDaemon", - "org.apache.harmony.tests.java.text.DecimalFormatSymbolsTest#test_setInternationalCurrencySymbolLjava_lang_String", - "org.apache.harmony.tests.java.text.DecimalFormatTest#testSerializationHarmonyRICompatible", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parseLjava_lang_StringLjava_text_ParsePosition", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_W_w_dd_MMMM_yyyy_EEEE", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_dayOfYearPatterns", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_m_z", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_2DigitOffsetFromGMT", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetFromGMT", - "org.apache.harmony.tests.java.text.SimpleDateFormatTest#test_parse_h_z_4DigitOffsetNoGMT", - "org.apache.harmony.tests.java.util.jar.JarFileTest#test_getInputStreamLjava_util_jar_JarEntry_subtest0", - "libcore.java.util.CalendarTest#test_clear_45877", - "org.apache.harmony.crypto.tests.javax.crypto.spec.SecretKeySpecTest#testGetFormat", - "org.apache.harmony.tests.java.util.TimerTaskTest#test_scheduledExecutionTime"] + names: ["libcore.java.lang.OldSystemTest#test_load"] }, { description: "Missing resource in classpath", @@ -262,10 +240,12 @@ names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"] }, { - description: "Flaky failure, possibly caused by a kernel bug accessing /proc/", + description: "Flaky failure, native crash in the runtime. + Unclear if this relates to the tests running sh as a child process.", result: EXEC_FAILED, - bug: 27464570, + bug: 30657148, modes: [device], - names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"] + names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit", + "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"] } ] |