diff options
136 files changed, 4944 insertions, 1789 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index 33c2a8eb9e..b5d41d98ae 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -136,7 +136,6 @@ endif # Base set of cflags used by all things ART. art_cflags += \ -fno-rtti \ - -std=gnu++11 \ -ggdb3 \ -Wall \ -Werror \ @@ -152,24 +151,19 @@ art_cflags += \ # The architectures the compiled tools are able to run on. Setting this to 'all' will cause all # architectures to be included. -ART_TARGET_CODEGEN_ARCHS ?= all +ART_TARGET_CODEGEN_ARCHS ?= svelte ART_HOST_CODEGEN_ARCHS ?= all ifeq ($(ART_TARGET_CODEGEN_ARCHS),all) ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH)) - # We need to handle the fact that some compiler tests mix code from different architectures. - ART_TARGET_COMPILER_TESTS ?= true else - ART_TARGET_COMPILER_TESTS := false ifeq ($(ART_TARGET_CODEGEN_ARCHS),svelte) ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_ARCH_64) $(ART_TARGET_ARCH_32)) endif endif ifeq ($(ART_HOST_CODEGEN_ARCHS),all) ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH)) - ART_HOST_COMPILER_TESTS ?= true else - ART_HOST_COMPILER_TESTS := false ifeq ($(ART_HOST_CODEGEN_ARCHS),svelte) ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS) $(ART_HOST_ARCH_64) $(ART_HOST_ARCH_32)) endif diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 4739f7d1c7..c61efac80c 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -299,13 +299,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ COMPILER_GTEST_COMMON_SRC_FILES_all := \ compiler/jni/jni_cfi_test.cc \ compiler/optimizing/codegen_test.cc \ - compiler/optimizing/constant_folding_test.cc \ - compiler/optimizing/dead_code_elimination_test.cc \ - compiler/optimizing/linearize_test.cc \ - compiler/optimizing/liveness_test.cc \ - compiler/optimizing/live_ranges_test.cc \ compiler/optimizing/optimizing_cfi_test.cc \ - compiler/optimizing/register_allocator_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_arm := \ compiler/linker/arm/relative_patcher_thumb2_test.cc \ @@ -325,6 +319,16 @@ COMPILER_GTEST_COMMON_SRC_FILES_x86 := \ compiler/linker/x86/relative_patcher_x86_test.cc \ compiler/utils/x86/managed_register_x86_test.cc \ +# These tests are testing architecture-independent functionality, but happen +# to use x86 codegen as part of the test. +COMPILER_GTEST_COMMON_SRC_FILES_x86 += \ + compiler/optimizing/constant_folding_test.cc \ + compiler/optimizing/dead_code_elimination_test.cc \ + compiler/optimizing/linearize_test.cc \ + compiler/optimizing/live_ranges_test.cc \ + compiler/optimizing/liveness_test.cc \ + compiler/optimizing/register_allocator_test.cc \ + COMPILER_GTEST_COMMON_SRC_FILES_x86_64 := \ compiler/linker/x86_64/relative_patcher_x86_64_test.cc \ @@ -359,9 +363,7 @@ COMPILER_GTEST_TARGET_SRC_FILES_x86_64 := \ $(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \ $(foreach arch,$(ART_TARGET_CODEGEN_ARCHS),$(eval COMPILER_GTEST_TARGET_SRC_FILES += $$(COMPILER_GTEST_TARGET_SRC_FILES_$(arch)))) -ifeq (true,$(ART_TARGET_COMPILER_TESTS)) - COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all) -endif +COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all) COMPILER_GTEST_HOST_SRC_FILES := \ $(COMPILER_GTEST_COMMON_SRC_FILES) \ @@ -395,9 +397,7 @@ COMPILER_GTEST_HOST_SRC_FILES_x86_64 := \ compiler/utils/x86_64/assembler_x86_64_test.cc $(foreach arch,$(ART_HOST_CODEGEN_ARCHS),$(eval COMPILER_GTEST_HOST_SRC_FILES += $$(COMPILER_GTEST_HOST_SRC_FILES_$(arch)))) -ifeq (true,$(ART_HOST_COMPILER_TESTS)) - COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all) -endif +COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all) ART_TEST_CFLAGS := diff --git a/compiler/Android.mk b/compiler/Android.mk index 2426eb9c84..37f48e17bb 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -47,7 +47,6 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_utils.cc \ optimizing/constant_folding.cc \ optimizing/dead_code_elimination.cc \ - optimizing/dex_cache_array_fixups_arm.cc \ optimizing/graph_checker.cc \ optimizing/graph_visualizer.cc \ optimizing/gvn.cc \ @@ -61,7 +60,6 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/load_store_elimination.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ - optimizing/nodes_arm64.cc \ optimizing/optimization.cc \ optimizing/optimizing_compiler.cc \ optimizing/parallel_move_resolver.cc \ @@ -78,7 +76,6 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/ssa_liveness_analysis.cc \ optimizing/ssa_phi_elimination.cc \ optimizing/stack_map_stream.cc \ - optimizing/x86_memory_gen.cc \ trampolines/trampoline_compiler.cc \ utils/assembler.cc \ utils/jni_macro_assembler.cc \ @@ -94,6 +91,9 @@ LIBART_COMPILER_SRC_FILES_arm := \ linker/arm/relative_patcher_arm_base.cc \ linker/arm/relative_patcher_thumb2.cc \ optimizing/code_generator_arm.cc \ + optimizing/dex_cache_array_fixups_arm.cc \ + optimizing/instruction_simplifier_arm.cc \ + optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm.cc \ utils/arm/assembler_arm.cc \ utils/arm/assembler_thumb2.cc \ @@ -108,8 +108,8 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ $(LIBART_COMPILER_SRC_FILES_arm) \ jni/quick/arm64/calling_convention_arm64.cc \ linker/arm64/relative_patcher_arm64.cc \ + optimizing/nodes_arm64.cc \ optimizing/code_generator_arm64.cc \ - optimizing/instruction_simplifier_arm.cc \ optimizing/instruction_simplifier_arm64.cc \ optimizing/instruction_simplifier_shared.cc \ optimizing/intrinsics_arm64.cc \ @@ -143,6 +143,7 @@ LIBART_COMPILER_SRC_FILES_x86 := \ optimizing/code_generator_x86.cc \ optimizing/intrinsics_x86.cc \ optimizing/pc_relative_fixups_x86.cc \ + optimizing/x86_memory_gen.cc \ utils/x86/assembler_x86.cc \ utils/x86/jni_macro_assembler_x86.cc \ utils/x86/managed_register_x86.cc \ diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index f8b7460935..c754e5588c 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -22,11 +22,13 @@ #include <sstream> #include "arch/instruction_set.h" +#include "base/enums.h" #include "debug/dwarf/dwarf_constants.h" #include "debug/dwarf/dwarf_test.h" #include "debug/dwarf/headers.h" #include "disassembler/disassembler.h" #include "gtest/gtest.h" +#include "thread.h" namespace art { @@ -57,7 +59,13 @@ class CFITest : public dwarf::DwarfTest { // Pretty-print assembly. const uint8_t* asm_base = actual_asm.data(); const uint8_t* asm_end = asm_base + actual_asm.size(); - auto* opts = new DisassemblerOptions(false, asm_base, asm_end, true); + auto* opts = new DisassemblerOptions(false, + asm_base, + asm_end, + true, + is64bit + ? &Thread::DumpThreadOffset<PointerSize::k64> + : &Thread::DumpThreadOffset<PointerSize::k32>); std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts)); std::stringstream stream; const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0); diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 91579e9daf..e1ee0d2966 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -188,6 +188,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { } uint64_t image_file_size; + size_t image_size; { std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); ASSERT_TRUE(file.get() != nullptr); @@ -206,6 +207,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(space->IsMallocSpace()); image_file_size = file->GetLength(); + image_size = image_header.GetImageSize(); } ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); @@ -255,10 +257,10 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(image_space != nullptr); if (storage_mode == ImageHeader::kStorageModeUncompressed) { // Uncompressed, image should be smaller than file. - ASSERT_LE(image_space->Size(), image_file_size); + ASSERT_LE(image_size, image_file_size); } else { // Compressed, file should be smaller than image. - ASSERT_LE(image_file_size, image_space->Size()); + ASSERT_LE(image_file_size, image_size); } image_space->VerifyImageAllocations(); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index efae4d0583..bb459996e3 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -52,6 +52,7 @@ #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" +#include "mirror/dex_cache.h" #include "mirror/dex_cache-inl.h" #include "mirror/method.h" #include "mirror/object-inl.h" @@ -1418,6 +1419,9 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_offset = RoundUp(bin_offset, method_alignment); break; } + case kBinDexCacheArray: + bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment()); + break; case kBinImTable: case kBinIMTConflictTable: { bin_offset = RoundUp(bin_offset, static_cast<size_t>(target_ptr_size_)); @@ -2034,7 +2038,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e. // static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))). - GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings(); + mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings(); if (orig_strings != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(), NativeLocationInImage(orig_strings), diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 524ce4d34e..4b056f552a 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -104,12 +104,24 @@ class JNICFITest : public CFITest { TestImpl(isa, #isa, expected_asm, expected_cfi); \ } +#ifdef ART_ENABLE_CODEGEN_arm TEST_ISA(kThumb2) +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 TEST_ISA(kArm64) +#endif +#ifdef ART_ENABLE_CODEGEN_x86 TEST_ISA(kX86) +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 TEST_ISA(kX86_64) +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_ISA(kMips) +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_ISA(kMips64) +#endif #endif // ART_TARGET_ANDROID diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 8273b15667..8a809822df 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1189,8 +1189,13 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } mirror::String* GetTargetString(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { - mirror::DexCache* dex_cache = GetDexCache(patch.TargetStringDexFile()); - mirror::String* string = dex_cache->GetResolvedString(patch.TargetStringIndex()); + ScopedObjectAccessUnchecked soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile()))); + mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(), + patch.TargetStringIndex(), + dex_cache); DCHECK(string != nullptr); DCHECK(writer_->HasBootImage() || Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string)); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5152075499..c532e72465 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1228,7 +1228,8 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in instruction->IsLoadString() || instruction->IsInstanceOf() || instruction->IsCheckCast() || - (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified())) + (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) || + (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified())) << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 4c4128c5f8..6d9c55cd75 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -429,7 +429,8 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -441,6 +442,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { DCHECK_NE(reg, SP); DCHECK_NE(reg, LR); DCHECK_NE(reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(reg, IP); DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; // "Compact" slow path, saving two moves. // @@ -5585,55 +5589,15 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives - // a 128B range. To try and reduce the number of literals if we load multiple strings, - // simply split the dex cache address to a 128B aligned base loaded from a literal - // and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2; - uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits); - uint32_t offset = address & MaxInt<uint32_t>(offset_bits); - __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase(); - int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset(); - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static int32_t GetExceptionTlsOffset() { @@ -6413,7 +6377,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root); codegen_->AddSlowPath(slow_path); @@ -6522,7 +6486,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Introduce a dependency on the lock_word including the rb_state, // which shall prevent load-load reordering without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp_reg. + // `obj` is unchanged by this operation, but its value now depends + // on `temp_reg`. __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); // The actual reference load. @@ -6553,7 +6518,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d95e7df6b4..cc8985d0b0 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -595,7 +595,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -607,7 +608,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { DCHECK_NE(obj_.reg(), LR); DCHECK_NE(obj_.reg(), WSP); DCHECK_NE(obj_.reg(), WZR); - // WIP0 is used by the slow path as a temp, it can not be the object register. + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. DCHECK_NE(obj_.reg(), IP0); DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); // "Compact" slow path, saving two moves. @@ -4195,7 +4197,6 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { - Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); switch (load->GetLoadKind()) { @@ -4231,63 +4232,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads - // that gives a 16KiB range. To try and reduce the number of literals if we load - // multiple strings, simply split the dex cache address to a 16KiB aligned base - // loaded from a literal and the remaining offset embedded in the load. - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2; - uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits); - uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits); - __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out.X(), offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - // Add ADRP with its PC-relative DexCache access patch. - const DexFile& dex_file = load->GetDexFile(); - uint32_t element_offset = load->GetDexCacheElementOffset(); - vixl::aarch64::Label* adrp_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } - // Add LDR with its PC-relative DexCache access patch. - vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = InputRegisterAt(load, 0); - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -5088,7 +5041,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. + // Slow path marking the GC root `root`. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); @@ -5239,7 +5192,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - // obj is unchanged by this operation, but its value now depends on temp. + // `obj` is unchanged by this operation, but its value now depends + // on `temp`. __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32)); // The actual reference load. @@ -5285,7 +5239,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. + // Slow path marking the object `ref` when it is gray. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 58879bc2f1..8a2f90d541 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1833,11 +1833,19 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { } } +auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) { + auto null_checker = [this, instruction]() { + this->codegen_->MaybeRecordImplicitNullCheck(instruction); + }; + return null_checker; +} + void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + auto null_checker = GetImplicitNullChecker(instruction); Primitive::Type type = instruction->GetType(); switch (type) { @@ -1846,10 +1854,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1859,10 +1867,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1872,11 +1880,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1886,11 +1894,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1902,11 +1910,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(kLoadWord, out, obj, offset); + __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadWord, out, TMP, data_offset); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); } break; } @@ -1916,11 +1924,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset); + __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset); + __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } break; } @@ -1930,11 +1938,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out, obj, offset); + __ LoadSFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ LoadSFromOffset(out, TMP, data_offset); + __ LoadSFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1944,11 +1952,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(out, obj, offset); + __ LoadDFromOffset(out, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ LoadDFromOffset(out, TMP, data_offset); + __ LoadDFromOffset(out, TMP, data_offset, null_checker); } break; } @@ -1957,7 +1965,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -2004,6 +2011,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + auto null_checker = GetImplicitNullChecker(instruction); switch (value_type) { case Primitive::kPrimBoolean: @@ -2013,10 +2021,10 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value, obj, offset, null_checker); } else { __ Addu(TMP, obj, index.AsRegister<Register>()); - __ StoreToOffset(kStoreByte, value, TMP, data_offset); + __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker); } break; } @@ -2028,11 +2036,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_2); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreHalfword, value, TMP, data_offset); + __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker); } break; } @@ -2045,14 +2053,13 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, value, obj, offset); + __ StoreToOffset(kStoreWord, value, obj, offset, null_checker); } else { DCHECK(index.IsRegister()) << index; __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreWord, value, TMP, data_offset); + __ StoreToOffset(kStoreWord, value, TMP, data_offset, null_checker); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); codegen_->MarkGCCard(obj, value); @@ -2075,11 +2082,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreDoubleword, value, obj, offset); + __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset); + __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); } break; } @@ -2091,11 +2098,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreSToOffset(value, obj, offset); + __ StoreSToOffset(value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); __ Addu(TMP, obj, TMP); - __ StoreSToOffset(value, TMP, data_offset); + __ StoreSToOffset(value, TMP, data_offset, null_checker); } break; } @@ -2107,11 +2114,11 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreDToOffset(value, obj, offset); + __ StoreDToOffset(value, obj, offset, null_checker); } else { __ Sll(TMP, index.AsRegister<Register>(), TIMES_8); __ Addu(TMP, obj, TMP); - __ StoreDToOffset(value, TMP, data_offset); + __ StoreDToOffset(value, TMP, data_offset, null_checker); } break; } @@ -2120,11 +2127,6 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -3589,6 +3591,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3654,34 +3657,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); - Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); - if (obj == dst) { - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst, obj, offset); - } else { - __ LoadFromOffset(kLoadWord, dst, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); - } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); - __ LoadFromOffset(load_type, dst, obj, offset); } + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, offset); + __ LoadSFromOffset(dst, obj, offset, null_checker); } else { - __ LoadDFromOffset(dst, obj, offset); + __ LoadDFromOffset(dst, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } if (is_volatile) { @@ -3729,6 +3718,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction); switch (type) { case Primitive::kPrimBoolean: @@ -3800,28 +3790,20 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); - Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - __ StoreToOffset(kStoreWord, src, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); - __ StoreToOffset(store_type, src, obj, offset); } + __ StoreToOffset(store_type, src, obj, offset, null_checker); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, offset); + __ StoreSToOffset(src, obj, offset, null_checker); } else { - __ StoreDToOffset(src, obj, offset); + __ StoreDToOffset(src, obj, offset, null_checker); } } - // Longs are handled earlier. - if (type != Primitive::kPrimLong) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } // TODO: memory barriers? @@ -4580,11 +4562,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; - // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: - case HLoadString::LoadKind::kDexCacheViaMethod: - base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); - break; default: base_or_current_method_reg = ZERO; break; @@ -4628,52 +4605,15 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); - DCHECK_ALIGNED(load->GetAddress(), 4u); - int16_t offset = Low16Bits(address); - uint32_t base_address = address - offset; // This accounts for offset sign extension. - __ Lui(out, High16Bits(base_address)); - // /* GcRoot<mirror::String> */ out = *(base_address + offset) - GenerateGcRootFieldLoad(load, out_loc, out, offset); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase(); - int32_t offset = - load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) - GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad(load, - out_loc, - base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad(load, - out_loc, - out, - CodeGenerator::GetCacheOffset(load->GetStringIndex())); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 63a0345c1c..46810d658f 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -257,6 +257,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + auto GetImplicitNullChecker(HInstruction* instruction); MipsAssembler* const assembler_; CodeGeneratorMIPS* const codegen_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 4e7a2728b1..4a5755c925 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3261,22 +3261,11 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = load->GetLocations(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - __ LoadFromOffset(kLoadUnsignedWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset( - kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. - - if (!load->IsInDexCache()) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7a561bb4ad..f50eb5cb7e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -445,8 +445,8 @@ class ArraySetSlowPathX86 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -464,11 +464,16 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(reg); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -498,6 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; @@ -1578,15 +1584,15 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) { locations->SetOut(Location::SameAsFirstInput()); } -void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { +void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) { Register lhs_reg = lhs.AsRegister<Register>(); if (rhs.IsConstant()) { int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - codegen_->Compare32BitValue(lhs_reg, value); + Compare32BitValue(lhs_reg, value); } else if (rhs.IsStackSlot()) { - __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); + assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex())); } else { - __ cmpl(lhs_reg, rhs.AsRegister<Register>()); + assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>()); } } @@ -1619,7 +1625,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong); DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); LocationSummary* cond_locations = condition->GetLocations(); - GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); + codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); cond = X86Condition(condition->GetCondition()); } } else { @@ -1728,7 +1734,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { // Clear output register: setb only sets the low byte. __ xorl(reg, reg); - GenerateIntCompare(lhs, rhs); + codegen_->GenerateIntCompare(lhs, rhs); __ setb(X86Condition(cond->GetCondition()), reg); return; } @@ -4210,7 +4216,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimShort: case Primitive::kPrimChar: case Primitive::kPrimInt: { - GenerateIntCompare(left, right); + codegen_->GenerateIntCompare(left, right); break; } case Primitive::kPrimLong: { @@ -4630,10 +4636,6 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // load the temp into the XMM and then copy the XMM into the // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); - } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); } } @@ -4677,11 +4679,10 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5092,11 +5093,6 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -5171,11 +5167,10 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6230,48 +6225,15 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); - // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address)); - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - Register base_reg = locations->InAt(0).AsRegister<Register>(); - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); - // /* GcRoot<mirror::String> */ out = *(base + offset) /* PC-relative */ - GenerateGcRootFieldLoad( - load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - Register current_method = locations->InAt(0).AsRegister<Register>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -6313,8 +6275,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -6375,7 +6337,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6597,7 +6559,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -6633,8 +6595,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6673,8 +6634,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6706,8 +6666,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6715,8 +6674,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6907,17 +6865,17 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* Location maybe_temp) { Register out_reg = out.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<Register>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6934,17 +6892,15 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6987,9 +6943,9 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -7023,14 +6979,13 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -7038,7 +6993,6 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7051,14 +7005,13 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -7088,17 +7041,23 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // performance reasons. Register ref_reg = ref.AsRegister<Register>(); - Register temp_reg = temp.AsRegister<Register>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7106,25 +7065,20 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f306b33247..c644e401ff 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -254,8 +254,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -295,7 +294,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { HBasicBlock* default_block); void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double); - void GenerateIntCompare(Location lhs, Location rhs); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -431,6 +429,8 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); + void GenerateIntCompare(Location lhs, Location rhs); + void GenerateMemoryBarrier(MemBarrierKind kind); Label* GetLabelOf(HBasicBlock* block) const { @@ -486,7 +486,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -495,7 +494,6 @@ class CodeGeneratorX86 : public CodeGenerator { Register obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -503,7 +501,6 @@ class CodeGeneratorX86 : public CodeGenerator { Location ref, Register obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cf01a791ee..ec37e5db22 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -466,8 +466,8 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj) - : SlowPathCode(instruction), obj_(obj) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) + : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { DCHECK(kEmitCompilerReadBarrier); } @@ -485,11 +485,16 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); + if (unpoison_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>()); + } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -519,6 +524,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { private: const Location obj_; + const bool unpoison_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; @@ -4151,11 +4157,6 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -4199,11 +4200,10 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimNot: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4587,11 +4587,6 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. - if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); - } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4666,11 +4661,10 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -5635,53 +5629,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } - case HLoadString::LoadKind::kDexCacheAddress: { - DCHECK_NE(load->GetAddress(), 0u); - // /* GcRoot<mirror::String> */ out = *address - if (IsUint<32>(load->GetAddress())) { - Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true); - GenerateGcRootFieldLoad(load, out_loc, address); - } else { - // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). - __ movq(out, Immediate(load->GetAddress())); - GenerateGcRootFieldLoad(load, out_loc, Address(out, 0)); - } - break; - } - case HLoadString::LoadKind::kDexCachePcRelative: { - uint32_t offset = load->GetDexCacheElementOffset(); - Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset); - Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); - // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); - break; - } - case HLoadString::LoadKind::kDexCacheViaMethod: { - CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - GenerateGcRootFieldLoad( - load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - // /* GcRoot<mirror::String> */ out = out[string_index] - GenerateGcRootFieldLoad( - load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - break; - } default: - LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); - UNREACHABLE(); + break; } - if (!load->IsInDexCache()) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + // TODO: Re-add the compiler code to do string dex cache lookup again. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -5724,8 +5680,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { return kEmitCompilerReadBarrier && - (kUseBakerReadBarrier || - type_check_kind == TypeCheckKind::kAbstractClassCheck || + !kUseBakerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || type_check_kind == TypeCheckKind::kArrayObjectCheck); } @@ -5786,7 +5742,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset); switch (type_check_kind) { case TypeCheckKind::kExactCheck: { @@ -6016,8 +5972,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); @@ -6041,8 +5996,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. @@ -6062,8 +6016,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6087,8 +6040,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Walk over the class hierarchy to find a match. NearLabel loop; @@ -6114,8 +6066,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6134,8 +6085,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // Do an exact check. NearLabel check_non_primitive_component_type; @@ -6163,8 +6113,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6172,8 +6121,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&done); break; @@ -6189,8 +6137,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters( - instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset); // We always go into the type check slow path for the unresolved // and interface check cases. @@ -6358,17 +6305,17 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi Location maybe_temp) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + DCHECK(maybe_temp.IsRegister()) << maybe_temp; __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ movl(out_reg, Address(out_reg, offset)); @@ -6385,17 +6332,15 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstructi void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp) { + uint32_t offset) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6438,9 +6383,9 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); - // Slow path used to mark the GC root `root`. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root); + // Slow path marking the GC root `root`. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, root, /* unpoison */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6475,14 +6420,13 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6490,7 +6434,6 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6503,14 +6446,13 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in Address src = index.IsConstant() ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); - GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); } void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6540,17 +6482,23 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // performance reasons. CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); - CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // /* int32_t */ monitor = obj->monitor_ - __ movl(temp_reg, Address(obj, monitor_offset)); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6558,25 +6506,20 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // The actual reference load. // /* HeapReference<Object> */ ref = *src - __ movl(ref_reg, src); + __ movl(ref_reg, src); // Flags are unaffected. + + // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. + // Slow path marking the object `ref` when it is gray. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison */ true); + AddSlowPath(slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, slow_path->GetEntryLabel()); // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); - // Slow path used to mark the object `ref` when it is gray. - SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref); - AddSlowPath(slow_path); - - // if (rb_state == ReadBarrier::gray_ptr_) - // ref = ReadBarrier::Mark(ref); - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with SHR. - static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); - static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); - __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 4e0e34ce38..44844ac67a 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -248,8 +248,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, Location out, Location obj, - uint32_t offset, - Location maybe_temp); + uint32_t offset); // Generate a GC root reference load: // // root <- *address @@ -427,7 +426,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, uint32_t offset, - Location temp, bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. @@ -436,7 +434,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister obj, uint32_t data_offset, Location index, - Location temp, bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. @@ -444,7 +441,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location ref, CpuRegister obj, const Address& src, - Location temp, bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 18db507c48..fe6c0a305e 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -29,12 +29,6 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" -#include "code_generator_arm.h" -#include "code_generator_arm64.h" -#include "code_generator_mips.h" -#include "code_generator_mips64.h" -#include "code_generator_x86.h" -#include "code_generator_x86_64.h" #include "code_simulator_container.h" #include "common_compiler_test.h" #include "dex_file.h" @@ -52,10 +46,35 @@ #include "utils/mips64/managed_register_mips64.h" #include "utils/x86/managed_register_x86.h" +#ifdef ART_ENABLE_CODEGEN_arm +#include "code_generator_arm.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +#include "code_generator_arm64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86 +#include "code_generator_x86.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "code_generator_x86_64.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips +#include "code_generator_mips.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_mips64 +#include "code_generator_mips64.h" +#endif + #include "gtest/gtest.h" namespace art { +#ifdef ART_ENABLE_CODEGEN_arm // Provide our own codegen, that ensures the C calling conventions // are preserved. Currently, ART and C do not match as R4 is caller-save // in ART, and callee-save in C. Alternatively, we could use or write @@ -80,7 +99,9 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM { blocked_register_pairs_[arm::R6_R7] = false; } }; +#endif +#ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: TestCodeGeneratorX86(HGraph* graph, @@ -105,6 +126,7 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { blocked_register_pairs_[x86::ECX_EDI] = false; } }; +#endif class InternalCodeAllocator : public CodeAllocator { public: @@ -234,37 +256,54 @@ static void RunCode(InstructionSet target_isa, bool has_result, Expected expected) { CompilerOptions compiler_options; +#ifdef ART_ENABLE_CODEGEN_arm if (target_isa == kArm || target_isa == kThumb2) { std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kArm64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + if (target_isa == kArm64) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kX86_64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_mips + if (target_isa == kMips) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected); - } else if (target_isa == kMips64) { + } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 + if (target_isa == kMips64) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); } +#endif } static ::std::vector<InstructionSet> GetTargetISAs() { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 89d80cc281..b3d5341de0 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -122,7 +122,10 @@ class HGraphVisualizerDisassembler { new DisassemblerOptions(/* absolute_addresses */ false, base_address, end_address, - /* can_read_literals */ true))); + /* can_read_literals */ true, + Is64BitInstructionSet(instruction_set) + ? &Thread::DumpThreadOffset<PointerSize::k64> + : &Thread::DumpThreadOffset<PointerSize::k32>))); } ~HGraphVisualizerDisassembler() { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 27d9d48560..0bbc0e54bc 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -41,6 +41,92 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>(); + Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>(); + Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>(); + Register tmp = locations->GetTemp(3).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // Compute the base destination address in `dst_curr_addr`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(dst_curr_addr, dest, element_size * constant + offset); + } else { + __ add(dst_curr_addr, + dest, + ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(dst_curr_addr, offset); + } + + Label loop; + __ Bind(&loop); + __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex)); + __ MaybeUnpoisonHeapReference(tmp); + // TODO: Inline the mark bit check before calling the runtime? + // tmp = ReadBarrier::Mark(tmp); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp, SP); + DCHECK_NE(tmp, LR); + DCHECK_NE(tmp, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(src_curr_addr, IP); + DCHECK_NE(dst_curr_addr, IP); + DCHECK_NE(src_stop_addr, IP); + DCHECK_NE(tmp, IP); + DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(tmp); + __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex)); + __ cmp(src_curr_addr, ShifterOperand(src_stop_addr)); + __ b(&loop, NE); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM); +}; + +#undef __ + bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); @@ -1337,9 +1423,9 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) } void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1362,6 +1448,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM64 (because that register + // is clobbered by ReadBarrierMarkRegX entry points). Get an extra + // temporary register from the register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } static void CheckPosition(ArmAssembler* assembler, @@ -1427,9 +1520,9 @@ static void CheckPosition(ArmAssembler* assembler, } void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1438,18 +1531,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1465,7 +1562,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmp(src, ShifterOperand(dest)); - __ b(slow_path->GetEntryLabel(), EQ); + __ b(intrinsic_slow_path->GetEntryLabel(), EQ); } // Checked when building locations. @@ -1477,7 +1574,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ b(&conditions_on_positions_validated, NE); } __ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant)); - __ b(slow_path->GetEntryLabel(), GT); + __ b(intrinsic_slow_path->GetEntryLabel(), GT); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1490,19 +1587,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } else { __ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>())); } - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel()); + __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1511,7 +1608,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ cmp(length.AsRegister<Register>(), ShifterOperand(0)); - __ b(slow_path->GetEntryLabel(), LT); + __ b(intrinsic_slow_path->GetEntryLabel(), LT); } // Validity checks: source. @@ -1519,7 +1616,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1528,7 +1625,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1537,112 +1634,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); - __ LoadFromOffset(kLoadWord, temp2, src, class_offset); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); - __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ LoadFromOffset(kLoadWord, temp1, dest, class_offset); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ LoadFromOffset(kLoadWord, temp2, src, class_offset); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } - __ cmp(temp1, ShifterOperand(temp2)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - Label do_copy; - __ b(&do_copy, EQ); - if (!did_unpoison) { + __ cmp(temp1, ShifterOperand(temp2)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + Label do_copy; + __ b(&do_copy, EQ); + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // No need to unpoison the result, we're comparing against null. + __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ b(intrinsic_slow_path->GetEntryLabel(), NE); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to unpoison the result, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ b(slow_path->GetEntryLabel(), NE); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ LoadFromOffset(kLoadWord, temp1, src, class_offset); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); - __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp3); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp3` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ LoadFromOffset(kLoadWord, temp1, src, class_offset); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset); + __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp3); + } + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel()); + __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + // Compute the base source address in `temp1`. if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp1, src, element_size * constant + offset); } else { - __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2)); + __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift)); __ AddConstant(temp1, offset); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp2, dest, element_size * constant + offset); - } else { - __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2)); - __ AddConstant(temp2, offset); - } - + // Compute the end source address in `temp3`. if (length.IsConstant()) { int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); __ AddConstant(temp3, temp1, element_size * constant); } else { - __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2)); + __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - Label loop, done; - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + + // /* int32_t */ monitor = src->monitor_ + __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ add(src, src, ShifterOperand(temp2, LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ b(read_barrier_slow_path->GetEntryLabel(), CS); + + // Fast-path copy. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(temp2, dest, element_size * constant + offset); + } else { + __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(temp2, offset); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop, done; + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&done, EQ); + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1651,7 +1923,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 9cfe3ce569..91374b3108 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); }; +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) + : SlowPathCodeARM64(instruction), tmp_(tmp) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); + Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); + Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + Register tmp_reg = WRegisterFrom(tmp_); + + __ Bind(GetEntryLabel()); + vixl::aarch64::Label slow_copy_loop; + __ Bind(&slow_copy_loop); + __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); + codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); + // TODO: Inline the mark bit check before calling the runtime? + // tmp_reg = ReadBarrier::Mark(tmp_reg); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more + // explanations.) + DCHECK_NE(tmp_.reg(), LR); + DCHECK_NE(tmp_.reg(), WSP); + DCHECK_NE(tmp_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary (and not preserved). It thus cannot be used by + // any live register in this slow path. + DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); + DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); + DCHECK_NE(tmp_.reg(), IP0); + DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); + // This runtime call does not require a stack map. + codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); + __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&slow_copy_loop, ne); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } + + private: + Location tmp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); +}; #undef __ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { @@ -2035,9 +2102,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; // We want to use two temporary registers in order to reduce the register pressure in arm64. // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2090,12 +2157,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch register + // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 + // (because that register is clobbered by ReadBarrierMarkRegX + // entry points). Get an extra temporary register from the + // register allocator. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2104,6 +2179,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = XRegisterFrom(locations->InAt(0)); Location src_pos = locations->InAt(1); @@ -2111,10 +2187,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); Register temp1 = WRegisterFrom(locations->GetTemp(0)); + Location temp1_loc = LocationFrom(temp1); Register temp2 = WRegisterFrom(locations->GetTemp(1)); + Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); vixl::aarch64::Label conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2130,7 +2208,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ Cmp(src, dest); - __ B(slow_path->GetEntryLabel(), eq); + __ B(intrinsic_slow_path->GetEntryLabel(), eq); } // Checked when building locations. DCHECK(!optimizations.GetDestinationIsSource() @@ -2141,7 +2219,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ B(&conditions_on_positions_validated, ne); } __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); - __ B(slow_path->GetEntryLabel(), gt); + __ B(intrinsic_slow_path->GetEntryLabel(), gt); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2150,19 +2228,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); - __ B(slow_path->GetEntryLabel(), lt); + __ B(intrinsic_slow_path->GetEntryLabel(), lt); } __ Bind(&conditions_on_positions_validated); if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. - __ Cbz(src, slow_path->GetEntryLabel()); + __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. - __ Cbz(dest, slow_path->GetEntryLabel()); + __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); } // We have already checked in the LocationsBuilder for the constant case. @@ -2170,17 +2248,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { // If the length is negative, bail out. - __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel()); + __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel()); // If the length >= 128 then (currently) prefer native implementation. __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); - __ B(slow_path->GetEntryLabel(), ge); + __ B(intrinsic_slow_path->GetEntryLabel(), ge); } // Validity checks: source. CheckSystemArrayCopyPosition(masm, src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2189,90 +2267,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); + // Note: Because it is acquired from VIXL's scratch register pool, + // `temp3` might be IP0, and thus cannot be used as `ref` argument + // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier + // calls below (see ReadBarrierMarkSlowPathARM64 for more details). Register temp3 = temps.AcquireW(); + if (!optimizations.GetDoesNotNeedTypeCheck()) { // Check whether all elements of the source array are assignable to the component // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ Ldr(temp1, MemOperand(dest, class_offset)); - __ Ldr(temp2, MemOperand(src, class_offset)); - bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; - } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); + __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + } - if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp3 = temp2->component_type_ - __ Ldr(temp3, HeapOperand(temp2, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); - } + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + dest.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); + } - __ Cmp(temp1, temp2); + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + src.W(), + class_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + temp1, + component_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); + } + } else { + // Non read barrier code. + + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Ldr(temp1, MemOperand(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Ldr(temp2, MemOperand(src, class_offset)); + bool did_unpoison = false; + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp1->component_type_ + __ Ldr(temp3, HeapOperand(temp1, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - vixl::aarch64::Label do_copy; - __ B(&do_copy, eq); - if (!did_unpoison) { + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp3 = temp2->component_type_ + __ Ldr(temp3, HeapOperand(temp2, component_offset)); + __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); + // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); + __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); + } + + __ Cmp(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + vixl::aarch64::Label do_copy; + __ B(&do_copy, eq); + if (!did_unpoison) { + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ Ldr(temp1, HeapOperand(temp1, component_offset)); codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Ldr(temp1, HeapOperand(temp1, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ B(intrinsic_slow_path->GetEntryLabel(), ne); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ Ldr(temp1, HeapOperand(temp1, component_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ Ldr(temp1, HeapOperand(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ Cbnz(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ B(slow_path->GetEntryLabel(), ne); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ Ldr(temp1, HeapOperand(src.W(), class_offset)); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp3 = temp1->component_type_ - __ Ldr(temp3, HeapOperand(temp1, component_offset)); - __ Cbz(temp3, slow_path->GetEntryLabel()); - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); - __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp1_loc, + src.W(), + class_offset, + temp2, + /* needs_null_check */ false, + /* use_load_acquire */ false); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + temp2_loc, + temp1, + component_offset, + temp3, + /* needs_null_check */ false, + /* use_load_acquire */ false); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Ldr(temp1, HeapOperand(src.W(), class_offset)); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ Ldr(temp2, HeapOperand(temp1, component_offset)); + __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); + } + // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); + __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp3, slow_path->GetEntryLabel()); + __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); } Register src_curr_addr = temp1.X(); Register dst_curr_addr = temp2.X(); - Register src_stop_addr = temp3.X(); + Register src_stop_addr; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register IP0, obtained from the VIXL scratch + // register pool as `temp3`, cannot be used in + // ReadBarrierSystemArrayCopySlowPathARM64 (because that + // register is clobbered by ReadBarrierMarkRegX entry points). + // So another temporary register allocated by the register + // allocator instead. + DCHECK_EQ(LocationFrom(temp3).reg(), IP0); + src_stop_addr = XRegisterFrom(locations->GetTemp(2)); + } else { + src_stop_addr = temp3.X(); + } GenSystemArrayCopyAddresses(masm, Primitive::kPrimNot, @@ -2285,25 +2509,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dst_curr_addr, src_stop_addr); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop, done; const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - { + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + vixl::aarch64::Label loop, done; + + // Don't enter copy loop if `length == 0`. + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + Register tmp = temps.AcquireW(); + // Make sure `tmp` is not IP0, as it is clobbered by + // ReadBarrierMarkRegX entry points in + // ReadBarrierSystemArrayCopySlowPathARM64. + DCHECK_NE(LocationFrom(tmp).reg(), IP0); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `tmp`. + __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARM64* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop, done; + __ Bind(&loop); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&done, eq); + { + Register tmp = temps.AcquireW(); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + } + __ B(&loop); + __ Bind(&done); } - __ B(&loop); - __ Bind(&done); } // We only need one card marking on the destination array. codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } static void GenIsInfinite(LocationSummary* locations, diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 55e1ab2451..6e5eb6622b 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2456,16 +2456,18 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ FloorWS(FTMP, in); __ Mfc1(out, FTMP); - __ LoadConst32(TMP, 1); + if (!IsR6()) { + __ LoadConst32(TMP, -1); + } - // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0; + // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0; __ LoadConst32(AT, std::numeric_limits<int32_t>::max()); __ Bne(AT, out, &finite); __ Mtc1(ZERO, FTMP); if (IsR6()) { __ CmpLtS(FTMP, in, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColtS(in, FTMP); } @@ -2474,28 +2476,26 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&finite); - // TMP = (0.5f <= (in - out)) ? 1 : 0; + // TMP = (0.5f <= (in - out)) ? -1 : 0; __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float". __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f)); __ SubS(FTMP, in, FTMP); __ Mtc1(AT, half); if (IsR6()) { __ CmpLeS(FTMP, half, FTMP); - __ Mfc1(AT, FTMP); + __ Mfc1(TMP, FTMP); } else { __ ColeS(half, FTMP); } __ Bind(&add); - if (IsR6()) { - __ Selnez(TMP, TMP, AT); - } else { + if (!IsR6()) { __ Movf(TMP, ZERO); } - // Return out += TMP. - __ Addu(out, out, TMP); + // Return out -= TMP. + __ Subu(out, out, TMP); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 22f4181b92..cf4a040551 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location src_pos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + __ Bind(GetEntryLabel()); + // In this code path, registers `temp1`, `temp2`, and `temp3` + // (resp.) are not used for the base source address, the base + // destination address, and the end source address (resp.), as in + // other SystemArrayCopy intrinsic code paths. Instead they are + // (resp.) used for: + // - the loop index (`i`); + // - the source index (`src_index`) and the loaded (source) + // reference (`value`); and + // - the destination index (`dest_index`). + + // i = 0 + __ xorl(temp1, temp1); + NearLabel loop; + __ Bind(&loop); + // value = src_array[i + src_pos] + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset)); + } else { + __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset)); + } + __ MaybeUnpoisonHeapReference(temp2); + // TODO: Inline the mark bit check before calling the runtime? + // value = ReadBarrier::Mark(value) + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more + // explanations.) + DCHECK_NE(temp2, ESP); + DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2; + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(temp2); + // dest_array[i + dest_pos] = value + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + int32_t adjusted_offset = offset + constant * element_size; + __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2); + } else { + __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0)); + __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2); + } + // ++i + __ addl(temp1, Immediate(1)); + // if (i != length) goto loop + x86_codegen->GenerateIntCompare(temp1_loc, length); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { @@ -1835,10 +1934,9 @@ static void GenUnsafeGet(HInvoke* invoke, Register output = output_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1901,11 +1999,6 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { @@ -2678,9 +2771,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) } void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2710,9 +2803,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2721,17 +2814,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); Register src = locations->InAt(0).AsRegister<Register>(); Location src_pos = locations->InAt(1); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); - Location length = locations->InAt(4); - Register temp1 = locations->GetTemp(0).AsRegister<Register>(); - Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Location length_arg = locations->InAt(4); + Location length = length_arg; + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Location temp2_loc = locations->GetTemp(1); + Register temp2 = temp2_loc.AsRegister<Register>(); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -2747,7 +2844,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2755,7 +2852,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -2765,10 +2862,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -2777,16 +2874,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } - Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + Location temp3_loc = locations->GetTemp(2); + Register temp3 = temp3_loc.AsRegister<Register>(); if (length.IsStackSlot()) { __ movl(temp3, Address(ESP, length.GetStackIndex())); length = Location::RegisterLocation(temp3); @@ -2798,7 +2896,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<Register>(), length.AsRegister<Register>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -2806,7 +2904,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -2815,7 +2913,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -2824,72 +2922,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. + if (!optimizations.GetSourceIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the source is not a non primitive array. + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp1); + } __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (!optimizations.GetDestinationIsNonPrimitiveArray()) { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ temp2 = temp1->component_type_ - __ movl(temp2, Address(temp1, component_offset)); - __ testl(temp2, temp2); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp2); - __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - // Re-poison the heap reference to make the compare instruction below - // compare two poisoned references. - __ PoisonHeapReference(temp1); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (length.Equals(Location::RegisterLocation(temp3))) { + // When Baker read barriers are enabled, register `temp3`, + // which in the present case contains the `length` parameter, + // will be overwritten below. Make the `length` location + // reference the original stack location; it will be moved + // back to `temp3` later if necessary. + DCHECK(length_arg.IsStackSlot()); + length = length_arg; + } + + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + // Bail out if the destination is not a non primitive array. + // + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp2` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // Note: if heap poisoning is on, we are comparing two unpoisoned references here. + __ cmpl(temp1, temp2); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } } else { - // /* HeapReference<Class> */ temp1 = temp1->klass_ - __ movl(temp1, Address(dest, class_offset)); - } + // Non read barrier code. - // Note: if poisoning is on, we are here comparing two poisoned references. - __ cmpl(temp1, Address(src, class_offset)); + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray()) { + __ MaybeUnpoisonHeapReference(temp1); + // Bail out if the destination is not a non primitive array. + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ movl(temp2, Address(temp1, component_offset)); + __ testl(temp2, temp2); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(temp2); + __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + // Re-poison the heap reference to make the compare instruction below + // compare two poisoned references. + __ PoisonHeapReference(temp1); + } - if (optimizations.GetDestinationIsTypedObjectArray()) { - NearLabel do_copy; - __ j(kEqual, &do_copy); + // Note: if heap poisoning is on, we are comparing two poisoned references here. + __ cmpl(temp1, Address(src, class_offset)); + + if (optimizations.GetDestinationIsTypedObjectArray()) { + NearLabel do_copy; + __ j(kEqual, &do_copy); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); + __ MaybeUnpoisonHeapReference(temp1); + __ cmpl(Address(temp1, super_offset), Immediate(0)); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + __ Bind(&do_copy); + } else { + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `temp1` has been unpoisoned + // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); __ MaybeUnpoisonHeapReference(temp1); // /* HeapReference<Class> */ temp1 = temp1->component_type_ __ movl(temp1, Address(temp1, component_offset)); + __ testl(temp1, temp1); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); __ MaybeUnpoisonHeapReference(temp1); - __ cmpl(Address(temp1, super_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_copy); - } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); } - } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { - DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); - // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ testl(temp1, temp1); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(temp1); __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. + // Compute the base source address in `temp1`. int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); DCHECK_EQ(element_size, 4); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); @@ -2900,35 +3085,138 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); } - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // If it is needed (in the case of the fast-path loop), the base + // destination address is computed later, as `temp2` is used for + // intermediate computations. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + if (length.IsStackSlot()) { + // Location `length` is again pointing at a stack slot, as + // register `temp3` (which was containing the length parameter + // earlier) has been overwritten; restore it now + DCHECK(length.Equals(length_arg)); + __ movl(temp3, Address(ESP, length.GetStackIndex())); + length = Location::RegisterLocation(temp3); + } + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // for (size_t i = 0; i != length; ++i) { + // dest_array[dest_pos + i] = + // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i]))); + // } + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + + // Set the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); } else { - __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); - } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ pushl(Address(temp1, 0)); - __ cfi().AdjustCFAOffset(4); - __ popl(Address(temp2, 0)); - __ cfi().AdjustCFAOffset(-4); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + // Non read barrier code. + + // Compute the base destination address in `temp2`. + if (dest_pos.IsConstant()) { + int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp2, Address(dest, element_size * constant + offset)); + } else { + __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); + } + + // Compute the end source address in `temp3`. + if (length.IsConstant()) { + int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(temp3, Address(temp1, element_size * constant)); + } else { + __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ pushl(Address(temp1, 0)); + __ cfi().AdjustCFAOffset(4); + __ popl(Address(temp2, 0)); + __ cfi().AdjustCFAOffset(-4); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -2937,7 +3225,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { Register(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ab8b05c3d4..a4ee546237 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { + public: + explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) + : SlowPathCode(instruction) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + + CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>(); + + __ Bind(GetEntryLabel()); + NearLabel loop; + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(src_curr_addr, 0)); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + // TODO: Inline the mark bit check before calling the runtime? + // TMP = ReadBarrier::Mark(TMP); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + __ MaybePoisonHeapReference(CpuRegister(TMP)); + __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP)); + __ addl(src_curr_addr, Immediate(element_size)); + __ addl(dst_curr_addr, Immediate(element_size)); + __ cmpl(src_curr_addr, src_stop_addr); + __ j(kNotEqual, &loop); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); +}; + +#undef __ + #define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -1053,9 +1112,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1063,9 +1122,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { - // TODO(rpl): Implement read barriers in the SystemArrayCopy - // intrinsic and re-enable it (b/29516905). - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1074,18 +1133,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); Location src_pos = locations->InAt(1); CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); Location dest_pos = locations->InAt(3); Location length = locations->InAt(4); - CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); - CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); + Location temp1_loc = locations->GetTemp(0); + CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>(); + Location temp2_loc = locations->GetTemp(1); + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + Location temp3_loc = locations->GetTemp(2); + CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>(); + Location TMP_loc = Location::RegisterLocation(TMP); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); + SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; SystemArrayCopyOptimizations optimizations(invoke); @@ -1101,7 +1165,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { DCHECK_GE(src_pos_constant, dest_pos_constant); } else if (src_pos_constant < dest_pos_constant) { __ cmpl(src, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1109,7 +1173,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, &conditions_on_positions_validated); } __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); - __ j(kGreater, slow_path->GetEntryLabel()); + __ j(kGreater, intrinsic_slow_path->GetEntryLabel()); } } else { if (!optimizations.GetDestinationIsSource()) { @@ -1119,10 +1183,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (dest_pos.IsConstant()) { int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } else { __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } } @@ -1131,13 +1195,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNotNull()) { // Bail out if the source is null. __ testl(src, src); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { // Bail out if the destination is null. __ testl(dest, dest); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } // If the length is negative, bail out. @@ -1146,7 +1210,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { !optimizations.GetCountIsSourceLength() && !optimizations.GetCountIsDestinationLength()) { __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); - __ j(kLess, slow_path->GetEntryLabel()); + __ j(kLess, intrinsic_slow_path->GetEntryLabel()); } // Validity checks: source. @@ -1154,7 +1218,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { src_pos, src, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsSourceLength()); @@ -1163,7 +1227,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { dest_pos, dest, length, - slow_path, + intrinsic_slow_path, temp1, optimizations.GetCountIsDestinationLength()); @@ -1172,38 +1236,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // type of the destination array. We do two checks: the classes are the same, // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - __ movl(temp1, Address(dest, class_offset)); - __ movl(temp2, Address(src, class_offset)); + bool did_unpoison = false; - if (!optimizations.GetDestinationIsNonPrimitiveArray() || - !optimizations.GetSourceIsNonPrimitiveArray()) { - // One or two of the references need to be unpoisoned. Unpoison them - // both to make the identity check valid. - __ MaybeUnpoisonHeapReference(temp1); - __ MaybeUnpoisonHeapReference(temp2); - did_unpoison = true; + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + // Register `temp1` is not trashed by the read barrier emitted + // by GenerateFieldLoadWithBakerReadBarrier below, as that + // method produces a call to a ReadBarrierMarkRegX entry point, + // which saves all potentially live registers, including + // temporaries such a `temp1`. + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + // If heap poisoning is enabled, `temp1` and `temp2` have been + // unpoisoned by the the previous calls to + // GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ movl(temp1, Address(dest, class_offset)); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ movl(temp2, Address(src, class_offset)); + if (!optimizations.GetDestinationIsNonPrimitiveArray() || + !optimizations.GetSourceIsNonPrimitiveArray()) { + // One or two of the references need to be unpoisoned. Unpoison them + // both to make the identity check valid. + __ MaybeUnpoisonHeapReference(temp1); + __ MaybeUnpoisonHeapReference(temp2); + did_unpoison = true; + } } if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ TMP = temp2->component_type_ - __ movl(CpuRegister(TMP), Address(temp2, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // For the same reason given earlier, `temp1` is not trashed by the + // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. + // /* HeapReference<Class> */ TMP = temp2->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + // If heap poisoning is enabled, `TMP` has been unpoisoned by + // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + } else { + // /* HeapReference<Class> */ TMP = temp2->component_type_ + __ movl(CpuRegister(TMP), Address(temp2, component_offset)); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } __ cmpl(temp1, temp2); @@ -1211,34 +1317,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (!did_unpoison) { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + // We do not need to emit a read barrier for the following + // heap reference load, as `temp1` is only used in a + // comparison with null below, and this reference is not + // kept afterwards. + __ cmpl(Address(temp1, super_offset), Immediate(0)); + } else { + if (!did_unpoison) { + __ MaybeUnpoisonHeapReference(temp1); + } + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ movl(temp1, Address(temp1, component_offset)); __ MaybeUnpoisonHeapReference(temp1); + // No need to unpoison the following heap reference load, as + // we're comparing against null. + __ cmpl(Address(temp1, super_offset), Immediate(0)); } - // /* HeapReference<Class> */ temp1 = temp1->component_type_ - __ movl(temp1, Address(temp1, component_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ temp1 = temp1->super_class_ - __ movl(temp1, Address(temp1, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp1, temp1); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); __ Bind(&do_copy); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - // /* HeapReference<Class> */ temp1 = src->klass_ - __ movl(temp1, Address(src, class_offset)); - __ MaybeUnpoisonHeapReference(temp1); - // /* HeapReference<Class> */ TMP = temp1->component_type_ - __ movl(CpuRegister(TMP), Address(temp1, component_offset)); - __ testl(CpuRegister(TMP), CpuRegister(TMP)); - __ j(kEqual, slow_path->GetEntryLabel()); - __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // /* HeapReference<Class> */ temp1 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + codegen_->GenerateFieldLoadWithBakerReadBarrier( + invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + } else { + // /* HeapReference<Class> */ temp1 = src->klass_ + __ movl(temp1, Address(src, class_offset)); + __ MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ TMP = temp1->component_type_ + __ movl(CpuRegister(TMP), Address(temp1, component_offset)); + // No need to unpoison `TMP` now, as we're comparing against null. + __ testl(CpuRegister(TMP), CpuRegister(TMP)); + __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); + __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); + } __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } // Compute base source address, base destination address, and end source address. @@ -1266,19 +1394,88 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - NearLabel loop, done; - __ cmpl(temp1, temp3); - __ j(kEqual, &done); - __ Bind(&loop); - __ movl(CpuRegister(TMP), Address(temp1, 0)); - __ movl(Address(temp2, 0), CpuRegister(TMP)); - __ addl(temp1, Immediate(element_size)); - __ addl(temp2, Immediate(element_size)); - __ cmpl(temp1, temp3); - __ j(kNotEqual, &loop); - __ Bind(&done); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): + // + // if (src_ptr != end_ptr) { + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + // } + + NearLabel loop, done; + + // Don't enter copy loop if `length == 0`. + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; + constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); + + // if (rb_state == ReadBarrier::gray_ptr_) + // goto slow_path; + // At this point, just do the "if" and make sure that flags are preserved until the branch. + __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value)); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86-64 memory model. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // Slow path used to copy array when `src` is gray. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // We have done the "if" of the gray bit check above, now branch based on the flags. + __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + __ Bind(&done); + } else { + // Non read barrier code. + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + NearLabel loop, done; + __ cmpl(temp1, temp3); + __ j(kEqual, &done); + __ Bind(&loop); + __ movl(CpuRegister(TMP), Address(temp1, 0)); + __ movl(Address(temp2, 0), CpuRegister(TMP)); + __ addl(temp1, Immediate(element_size)); + __ addl(temp2, Immediate(element_size)); + __ cmpl(temp1, temp3); + __ j(kNotEqual, &loop); + __ Bind(&done); + } // We only need one card marking on the destination array. codegen_->MarkGCCard(temp1, @@ -1287,7 +1484,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CpuRegister(kNoRegister), /* value_can_be_null */ false); - __ Bind(slow_path->GetExitLabel()); + __ Bind(intrinsic_slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { @@ -1892,10 +2089,9 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, temp, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1918,9 +2114,7 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, - HInvoke* invoke, - Primitive::Type type) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1934,30 +2128,25 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); - } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(arena_, invoke); } diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index a6d234d739..8c0231e1aa 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -157,13 +157,26 @@ class OptimizingCFITest : public CFITest { TestImpl(isa, #isa, expected_asm, expected_cfi); \ } +#ifdef ART_ENABLE_CODEGEN_arm TEST_ISA(kThumb2) +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 TEST_ISA(kArm64) +#endif +#ifdef ART_ENABLE_CODEGEN_x86 TEST_ISA(kX86) +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 TEST_ISA(kX86_64) +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_ISA(kMips) +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_ISA(kMips64) +#endif +#ifdef ART_ENABLE_CODEGEN_arm TEST_F(OptimizingCFITest, kThumb2Adjust) { std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, @@ -184,7 +197,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { Finish(); Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips TEST_F(OptimizingCFITest, kMipsAdjust) { // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -212,7 +227,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Finish(); Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 TEST_F(OptimizingCFITest, kMips64Adjust) { // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. static constexpr size_t kNumNops = 1u + (1u << 15); @@ -240,6 +257,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) { Finish(); Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); } +#endif #endif // ART_TARGET_ANDROID diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 698b0b6d43..f7c325ed93 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -428,8 +428,14 @@ static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Strip pass name suffix to get optimization name. +static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) { + size_t pos = pass_name.find(kPassNameSeparator); + return pos == std::string::npos ? pass_name : pass_name.substr(0, pos); +} + static HOptimization* BuildOptimization( - const std::string& opt_name, + const std::string& pass_name, ArenaAllocator* arena, HGraph* graph, OptimizingCompilerStats* stats, @@ -439,6 +445,7 @@ static HOptimization* BuildOptimization( StackHandleScopeCollection* handles, SideEffectsAnalysis* most_recent_side_effects, HInductionVarAnalysis* most_recent_induction) { + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); return new (arena) BoundsCheckElimination(graph, @@ -446,11 +453,11 @@ static HOptimization* BuildOptimization( most_recent_induction); } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) { CHECK(most_recent_side_effects != nullptr); - return new (arena) GVNOptimization(graph, *most_recent_side_effects); + return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str()); } else if (opt_name == HConstantFolding::kConstantFoldingPassName) { - return new (arena) HConstantFolding(graph); + return new (arena) HConstantFolding(graph, pass_name.c_str()); } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) { - return new (arena) HDeadCodeElimination(graph, stats); + return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str()); } else if (opt_name == HInliner::kInlinerPassName) { size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; return new (arena) HInliner(graph, // outer_graph @@ -470,7 +477,7 @@ static HOptimization* BuildOptimization( } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { return new (arena) HInductionVarAnalysis(graph); } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, stats); + return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str()); } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { return new (arena) IntrinsicsRecognizer(graph, driver, stats); } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { @@ -522,12 +529,9 @@ static ArenaVector<HOptimization*> BuildOptimizations( SideEffectsAnalysis* most_recent_side_effects = nullptr; HInductionVarAnalysis* most_recent_induction = nullptr; ArenaVector<HOptimization*> ret(arena->Adapter()); - for (std::string pass_name : pass_names) { - size_t pos = pass_name.find(kPassNameSeparator); // Strip suffix to get base pass name. - std::string opt_name = pos == std::string::npos ? pass_name : pass_name.substr(0, pos); - + for (const std::string& pass_name : pass_names) { HOptimization* opt = BuildOptimization( - opt_name, + pass_name, arena, graph, stats, @@ -540,6 +544,7 @@ static ArenaVector<HOptimization*> BuildOptimizations( CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; ret.push_back(opt); + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index b73f73893c..6effc306dc 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -279,8 +279,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { const DexFile& dex_file = load_string->GetDexFile(); uint32_t string_index = load_string->GetStringIndex(); - bool is_in_dex_cache = false; - HLoadString::LoadKind desired_load_kind; + HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; uint64_t address = 0u; // String or dex cache element address. { Runtime* runtime = Runtime::Current(); @@ -296,33 +295,14 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); CHECK(string != nullptr); - if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS/MIPS64 or compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; - } else { - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - is_in_dex_cache = true; - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadString::LoadKind::kBootImageLinkTimePcRelative - : HLoadString::LoadKind::kBootImageLinkTimeAddress; - } + // TODO: In follow up CL, add PcRelative and Address back in. } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); mirror::String* string = dex_cache->GetResolvedString(string_index); - is_in_dex_cache = (string != nullptr); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); - } else { - // Note: If the string is not in the dex cache, the instruction needs environment - // and will not be inlined across dex files. Within a dex file, the slow-path helper - // loads the correct string and inlined frames are used correctly for OOM stack trace. - // TODO: Write a test for this. Bug: 29416588 - desired_load_kind = HLoadString::LoadKind::kDexCacheAddress; - void* dex_cache_element_address = &dex_cache->GetStrings()[string_index]; - address = reinterpret_cast64<uint64_t>(dex_cache_element_address); } } else { // AOT app compilation. Try to lookup the string without allocating if not found. @@ -332,19 +312,9 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); - } else { - // Not JIT and either the string is not in boot image or we are compiling in PIC mode. - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file) - ? HLoadString::LoadKind::kDexCachePcRelative - : HLoadString::LoadKind::kDexCacheViaMethod; } } } - if (is_in_dex_cache) { - load_string->MarkInDexCache(); - } HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind); switch (load_kind) { diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 8ba6fb4f91..17a6650fcd 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -906,6 +906,12 @@ class ArmAssembler : public Assembler { // reg = -reg. rsb(reg, reg, ShifterOperand(0)); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 19450b3a32..f91bcfa92e 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -146,6 +146,12 @@ void Arm64Assembler::UnpoisonHeapReference(Register reg) { ___ Neg(reg, Operand(reg)); } +void Arm64Assembler::MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } +} + void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { UnpoisonHeapReference(reg); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 2847cb86a8..66a7fed804 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -93,6 +93,8 @@ class Arm64Assembler FINAL : public Assembler { void PoisonHeapReference(vixl::aarch64::Register reg); // Unpoison a heap reference contained in `reg`. void UnpoisonHeapReference(vixl::aarch64::Register reg); + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(vixl::aarch64::Register reg); // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg); diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 797a98cfd5..1b743134ed 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -99,6 +99,7 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena)); #endif default: + UNUSED(arena); LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set; UNREACHABLE(); } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 8b7da3fa77..bfc63d14da 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -1407,44 +1407,6 @@ void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) } } -void MipsAssembler::StoreConst32ToOffset(int32_t value, - Register base, - int32_t offset, - Register temp) { - CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. - AdjustBaseAndOffset(base, offset, /* is_doubleword */ false); - if (value == 0) { - temp = ZERO; - } else { - LoadConst32(temp, value); - } - Sw(temp, base, offset); -} - -void MipsAssembler::StoreConst64ToOffset(int64_t value, - Register base, - int32_t offset, - Register temp) { - CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. - AdjustBaseAndOffset(base, offset, /* is_doubleword */ true); - uint32_t low = Low32Bits(value); - uint32_t high = High32Bits(value); - if (low == 0) { - Sw(ZERO, base, offset); - } else { - LoadConst32(temp, low); - Sw(temp, base, offset); - } - if (high == 0) { - Sw(ZERO, base, offset + kMipsWordSize); - } else { - if (high != low) { - LoadConst32(temp, high); - } - Sw(temp, base, offset + kMipsWordSize); - } -} - void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) { if (value == 0) { temp = ZERO; @@ -2533,61 +2495,19 @@ void MipsAssembler::AdjustBaseAndOffset(Register& base, CHECK_EQ(misalignment, offset & (kMipsDoublewordSize - 1)); } -void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base, +void MipsAssembler::LoadFromOffset(LoadOperandType type, + Register reg, + Register base, int32_t offset) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); - switch (type) { - case kLoadSignedByte: - Lb(reg, base, offset); - break; - case kLoadUnsignedByte: - Lbu(reg, base, offset); - break; - case kLoadSignedHalfword: - Lh(reg, base, offset); - break; - case kLoadUnsignedHalfword: - Lhu(reg, base, offset); - break; - case kLoadWord: - Lw(reg, base, offset); - break; - case kLoadDoubleword: - if (reg == base) { - // This will clobber the base when loading the lower register. Since we have to load the - // higher register as well, this will fail. Solution: reverse the order. - Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - Lw(reg, base, offset); - } else { - Lw(reg, base, offset); - Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + LoadFromOffset<>(type, reg, base, offset); } void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); - Lwc1(reg, base, offset); + LoadSFromOffset<>(reg, base, offset); } void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); - if (offset & 0x7) { - if (Is32BitFPU()) { - Lwc1(reg, base, offset); - Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); - } else { - // 64-bit FPU. - Lwc1(reg, base, offset); - Lw(T8, base, offset + kMipsWordSize); - Mthc1(T8, reg); - } - } else { - Ldc1(reg, base, offset); - } + LoadDFromOffset<>(reg, base, offset); } void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, @@ -2611,53 +2531,19 @@ void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32 } } -void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base, +void MipsAssembler::StoreToOffset(StoreOperandType type, + Register reg, + Register base, int32_t offset) { - // Must not use AT as `reg`, so as not to overwrite the value being stored - // with the adjusted `base`. - CHECK_NE(reg, AT); - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); - switch (type) { - case kStoreByte: - Sb(reg, base, offset); - break; - case kStoreHalfword: - Sh(reg, base, offset); - break; - case kStoreWord: - Sw(reg, base, offset); - break; - case kStoreDoubleword: - CHECK_NE(reg, base); - CHECK_NE(static_cast<Register>(reg + 1), base); - Sw(reg, base, offset); - Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreToOffset<>(type, reg, base, offset); } void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); - Swc1(reg, base, offset); + StoreSToOffset<>(reg, base, offset); } void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); - if (offset & 0x7) { - if (Is32BitFPU()) { - Swc1(reg, base, offset); - Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); - } else { - // 64-bit FPU. - Mfhc1(T8, reg); - Swc1(reg, base, offset); - Sw(T8, base, offset + kMipsWordSize); - } - } else { - Sdc1(reg, base, offset); - } + StoreDToOffset<>(reg, base, offset); } static dwarf::Reg DWARFReg(Register reg) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 41b6c6bd32..434ca679d5 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -412,8 +412,6 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void LoadConst64(Register reg_hi, Register reg_lo, int64_t value); void LoadDConst64(FRegister rd, int64_t value, Register temp); void LoadSConst32(FRegister r, int32_t value, Register temp); - void StoreConst32ToOffset(int32_t value, Register base, int32_t offset, Register temp); - void StoreConst64ToOffset(int64_t value, Register base, int32_t offset, Register temp); void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT); // These will generate R2 branches or R6 branches as appropriate. @@ -444,6 +442,204 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi int32_t& offset, bool is_doubleword, bool is_float = false); + + private: + struct NoImplicitNullChecker { + void operator()() {} + }; + + public: + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConst32ToOffset(int32_t value, + Register base, + int32_t offset, + Register temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + AdjustBaseAndOffset(base, offset, /* is_doubleword */ false); + if (value == 0) { + temp = ZERO; + } else { + LoadConst32(temp, value); + } + Sw(temp, base, offset); + null_checker(); + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConst64ToOffset(int64_t value, + Register base, + int32_t offset, + Register temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + AdjustBaseAndOffset(base, offset, /* is_doubleword */ true); + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + Sw(ZERO, base, offset); + } else { + LoadConst32(temp, low); + Sw(temp, base, offset); + } + null_checker(); + if (high == 0) { + Sw(ZERO, base, offset + kMipsWordSize); + } else { + if (high != low) { + LoadConst32(temp, high); + } + Sw(temp, base, offset + kMipsWordSize); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFromOffset(LoadOperandType type, + Register reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + Lw(reg, base, offset); + break; + case kLoadDoubleword: + if (reg == base) { + // This will clobber the base when loading the lower register. Since we have to load the + // higher register as well, this will fail. Solution: reverse the order. + Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + null_checker(); + Lw(reg, base, offset); + } else { + Lw(reg, base, offset); + null_checker(); + Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kLoadDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadSFromOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + Lwc1(reg, base, offset); + null_checker(); + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadDFromOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + if (IsAligned<kMipsDoublewordSize>(offset)) { + Ldc1(reg, base, offset); + null_checker(); + } else { + if (Is32BitFPU()) { + Lwc1(reg, base, offset); + null_checker(); + Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); + } else { + // 64-bit FPU. + Lwc1(reg, base, offset); + null_checker(); + Lw(T8, base, offset + kMipsWordSize); + Mthc1(T8, reg); + } + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreToOffset(StoreOperandType type, + Register reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // Must not use AT as `reg`, so as not to overwrite the value being stored + // with the adjusted `base`. + CHECK_NE(reg, AT); + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + CHECK_NE(reg, base); + CHECK_NE(static_cast<Register>(reg + 1), base); + Sw(reg, base, offset); + null_checker(); + Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreSToOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + Swc1(reg, base, offset); + null_checker(); + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreDToOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + if (IsAligned<kMipsDoublewordSize>(offset)) { + Sdc1(reg, base, offset); + null_checker(); + } else { + if (Is32BitFPU()) { + Swc1(reg, base, offset); + null_checker(); + Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); + } else { + // 64-bit FPU. + Mfhc1(T8, reg); + Swc1(reg, base, offset); + null_checker(); + Sw(T8, base, offset + kMipsWordSize); + } + } + } + void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset); void LoadSFromOffset(FRegister reg, Register base, int32_t offset); void LoadDFromOffset(FRegister reg, Register base, int32_t offset); diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f1a991574b..f2ef41f400 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1148,6 +1148,23 @@ void X86Assembler::testl(Register reg, const Immediate& immediate) { } +void X86Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF6); + EmitOperand(EAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86Assembler::andl(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x23); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 92a92a58b9..2ddcd760dd 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -496,6 +496,9 @@ class X86Assembler FINAL : public Assembler { void testl(Register reg, const Immediate& imm); void testl(Register reg1, const Address& address); + void testb(const Address& dst, const Immediate& imm); + void testl(const Address& dst, const Immediate& imm); + void andl(Register dst, const Immediate& imm); void andl(Register dst, Register src); void andl(Register dst, const Address& address); @@ -639,6 +642,12 @@ class X86Assembler FINAL : public Assembler { void PoisonHeapReference(Register reg) { negl(reg); } // Unpoison a heap reference contained in `reg`. void UnpoisonHeapReference(Register reg) { negl(reg); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 307e034b76..61d70d714a 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -375,6 +375,42 @@ TEST_F(AssemblerX86Test, CmovlAddress) { DriverStr(expected, "cmovl_address"); } +TEST_F(AssemblerX86Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-128)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(127)); + const char* expected = + "testb $1, 0xc(%EDI,%EBX,4)\n" + "testb $-128, 0x7(%ESP)\n" + "testb $127, 0x82(%EBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-100000)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%EDI,%EBX,4)\n" + "testl $-100000, 0x7(%ESP)\n" + "testl $77777777, 0x82(%EBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + ///////////////// // Near labels // ///////////////// diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ddc824425e..1f73aa7374 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1389,6 +1389,25 @@ void X86_64Assembler::testq(CpuRegister reg, const Address& address) { } +void X86_64Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF6); + EmitOperand(Register::RAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86_64Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 370f49cb05..3a4bfca6b0 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -528,6 +528,9 @@ class X86_64Assembler FINAL : public Assembler { void testq(CpuRegister reg1, CpuRegister reg2); void testq(CpuRegister reg, const Address& address); + void testb(const Address& address, const Immediate& imm); + void testl(const Address& address, const Immediate& imm); + void andl(CpuRegister dst, const Immediate& imm); void andl(CpuRegister dst, CpuRegister src); void andl(CpuRegister reg, const Address& address); @@ -741,6 +744,12 @@ class X86_64Assembler FINAL : public Assembler { void PoisonHeapReference(CpuRegister reg) { negl(reg); } // Unpoison a heap reference contained in `reg`. void UnpoisonHeapReference(CpuRegister reg) { negl(reg); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(CpuRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(CpuRegister reg) { if (kPoisonHeapReferences) { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 36c966b3cf..48a18760f1 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1526,6 +1526,48 @@ TEST_F(AssemblerX86_64Test, Cmpb) { DriverStr(expected, "cmpb"); } +TEST_F(AssemblerX86_64Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-128)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(127)); + const char* expected = + "testb $1, 0xc(%RDI,%RBX,4)\n" + "testb $-128, 0x7(%RSP)\n" + "testb $127, 0x82(%RBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86_64Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-100000)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%RDI,%RBX,4)\n" + "testl $-100000, 0x7(%RSP)\n" + "testl $77777777, 0x82(%RBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> { public: using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>; diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk index 71e9a283d6..6c0bcb173e 100644 --- a/dalvikvm/Android.mk +++ b/dalvikvm/Android.mk @@ -18,7 +18,7 @@ LOCAL_PATH := $(call my-dir) include art/build/Android.common.mk -dalvikvm_cflags := -Wall -Werror -Wextra -std=gnu++11 +dalvikvm_cflags := -Wall -Werror -Wextra include $(CLEAR_VARS) LOCAL_MODULE := dalvikvm diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc index e604c1f629..bcd0d1630a 100644 --- a/disassembler/disassembler.cc +++ b/disassembler/disassembler.cc @@ -32,10 +32,8 @@ Disassembler* Disassembler::Create(InstructionSet instruction_set, DisassemblerO return new arm::DisassemblerArm(options); } else if (instruction_set == kArm64) { return new arm64::DisassemblerArm64(options); - } else if (instruction_set == kMips) { - return new mips::DisassemblerMips(options, false); - } else if (instruction_set == kMips64) { - return new mips::DisassemblerMips(options, true); + } else if (instruction_set == kMips || instruction_set == kMips64) { + return new mips::DisassemblerMips(options); } else if (instruction_set == kX86) { return new x86::DisassemblerX86(options, false); } else if (instruction_set == kX86_64) { diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h index b08031587f..86793ccb19 100644 --- a/disassembler/disassembler.h +++ b/disassembler/disassembler.h @@ -28,8 +28,9 @@ namespace art { class DisassemblerOptions { public: - // Should the disassembler print absolute or relative addresses. - const bool absolute_addresses_; + using ThreadOffsetNameFunction = void (*)(std::ostream& os, uint32_t offset); + + ThreadOffsetNameFunction thread_offset_name_function_; // Base address for calculating relative code offsets when absolute_addresses_ is false. const uint8_t* const base_address_; @@ -37,6 +38,9 @@ class DisassemblerOptions { // End address (exclusive); const uint8_t* const end_address_; + // Should the disassembler print absolute or relative addresses. + const bool absolute_addresses_; + // If set, the disassembler is allowed to look at load targets in literal // pools. const bool can_read_literals_; @@ -44,10 +48,12 @@ class DisassemblerOptions { DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address, const uint8_t* end_address, - bool can_read_literals) - : absolute_addresses_(absolute_addresses), + bool can_read_literals, + ThreadOffsetNameFunction fn) + : thread_offset_name_function_(fn), base_address_(base_address), end_address_(end_address), + absolute_addresses_(absolute_addresses), can_read_literals_(can_read_literals) {} private: diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 4f0e144aa8..a47b6adcc9 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -25,7 +25,6 @@ #include "base/bit_utils.h" #include "base/logging.h" #include "base/stringprintf.h" -#include "thread.h" namespace art { namespace arm { @@ -329,7 +328,7 @@ void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) { } if (rn.r == 9) { args << " ; "; - Thread::DumpThreadOffset<kArmPointerSize>(args, offset); + GetDisassemblerOptions()->thread_offset_name_function_(args, offset); } } } @@ -1401,7 +1400,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; if (Rn.r == TR && is_load) { args << " ; "; - Thread::DumpThreadOffset<kArmPointerSize>(args, imm12); + GetDisassemblerOptions()->thread_offset_name_function_(args, imm12); } else if (Rn.r == PC) { T2LitType lit_type[] = { kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc index 0ef9025cd4..80bacb2be3 100644 --- a/disassembler/disassembler_arm64.cc +++ b/disassembler/disassembler_arm64.cc @@ -22,7 +22,6 @@ #include "base/logging.h" #include "base/stringprintf.h" -#include "thread.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -102,7 +101,7 @@ void CustomDisassembler::VisitLoadStoreUnsignedOffset(const Instruction* instr) if (instr->GetRn() == TR) { int64_t offset = instr->GetImmLSUnsigned() << instr->GetSizeLS(); std::ostringstream tmp_stream; - Thread::DumpThreadOffset<kArm64PointerSize>(tmp_stream, static_cast<uint32_t>(offset)); + options_->thread_offset_name_function_(tmp_stream, static_cast<uint32_t>(offset)); AppendToOutput(" ; %s", tmp_stream.str().c_str()); } } diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h index 7c64792b13..19e4dfb486 100644 --- a/disassembler/disassembler_arm64.h +++ b/disassembler/disassembler_arm64.h @@ -35,7 +35,8 @@ class CustomDisassembler FINAL : public vixl::aarch64::Disassembler { : vixl::aarch64::Disassembler(), read_literals_(options->can_read_literals_), base_address_(options->base_address_), - end_address_(options->end_address_) { + end_address_(options->end_address_), + options_(options) { if (!options->absolute_addresses_) { MapCodeAddress(0, reinterpret_cast<const vixl::aarch64::Instruction*>(options->base_address_)); @@ -64,6 +65,8 @@ class CustomDisassembler FINAL : public vixl::aarch64::Disassembler { // Valid address range: [base_address_, end_address_) const void* const base_address_; const void* const end_address_; + + DisassemblerOptions* options_; }; class DisassemblerArm64 FINAL : public Disassembler { diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index 3448878394..02c6d71510 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -21,7 +21,6 @@ #include "base/logging.h" #include "base/stringprintf.h" -#include "thread.h" namespace art { namespace mips { @@ -503,11 +502,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { args << StringPrintf("%+d(r%d)", offset, rs); if (rs == 17) { args << " ; "; - if (is64bit_) { - Thread::DumpThreadOffset<kMips64PointerSize>(args, offset); - } else { - Thread::DumpThreadOffset<kMipsPointerSize>(args, offset); - } + GetDisassemblerOptions()->thread_offset_name_function_(args, offset); } } break; diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h index b0e49b3978..6342f22962 100644 --- a/disassembler/disassembler_mips.h +++ b/disassembler/disassembler_mips.h @@ -26,9 +26,8 @@ namespace mips { class DisassemblerMips FINAL : public Disassembler { public: - DisassemblerMips(DisassemblerOptions* options, bool is64bit) + explicit DisassemblerMips(DisassemblerOptions* options) : Disassembler(options), - is64bit_(is64bit), last_ptr_(nullptr), last_instr_(0) {} @@ -36,8 +35,6 @@ class DisassemblerMips FINAL : public Disassembler { void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE; private: - const bool is64bit_; - // Address and encoding of the last disassembled instruction. // Needed to produce more readable disassembly of certain 2-instruction sequences. const uint8_t* last_ptr_; diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 147e0b142d..2ca84e5e5b 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -23,7 +23,6 @@ #include "base/logging.h" #include "base/stringprintf.h" -#include "thread.h" namespace art { namespace x86 { @@ -1409,11 +1408,11 @@ DISASSEMBLER_ENTRY(cmp, } if (prefix[1] == kFs && !supports_rex_) { args << " ; "; - Thread::DumpThreadOffset<kX86PointerSize>(args, address_bits); + GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits); } if (prefix[1] == kGs && supports_rex_) { args << " ; "; - Thread::DumpThreadOffset<kX86_64PointerSize>(args, address_bits); + GetDisassemblerOptions()->thread_offset_name_function_(args, address_bits); } const char* prefix_str; switch (prefix[0]) { diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 77730b9255..96c8e94d9b 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -335,10 +335,14 @@ class OatDumper { resolved_addr2instr_(0), instruction_set_(oat_file_.GetOatHeader().GetInstructionSet()), disassembler_(Disassembler::Create(instruction_set_, - new DisassemblerOptions(options_.absolute_addresses_, - oat_file.Begin(), - oat_file.End(), - true /* can_read_literals_ */))) { + new DisassemblerOptions( + options_.absolute_addresses_, + oat_file.Begin(), + oat_file.End(), + true /* can_read_literals_ */, + Is64BitInstructionSet(instruction_set_) + ? &Thread::DumpThreadOffset<PointerSize::k64> + : &Thread::DumpThreadOffset<PointerSize::k32>))) { CHECK(options_.class_loader_ != nullptr); CHECK(options_.class_filter_ != nullptr); CHECK(options_.method_filter_ != nullptr); @@ -1402,7 +1406,7 @@ class OatDumper { const std::vector<const OatFile::OatDexFile*> oat_dex_files_; const OatDumperOptions& options_; uint32_t resolved_addr2instr_; - InstructionSet instruction_set_; + const InstructionSet instruction_set_; std::set<uintptr_t> offsets_; Disassembler* disassembler_; }; diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc index 9432384561..3f6531b5b4 100644 --- a/patchoat/patchoat.cc +++ b/patchoat/patchoat.cc @@ -37,6 +37,7 @@ #include "gc/space/image_space.h" #include "image-inl.h" #include "mirror/abstract_method.h" +#include "mirror/dex_cache.h" #include "mirror/object-inl.h" #include "mirror/method.h" #include "mirror/reference.h" @@ -592,8 +593,8 @@ void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e. // static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))). - GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings(); - GcRoot<mirror::String>* relocated_strings = RelocatedAddressOfPointer(orig_strings); + mirror::StringDexCacheType* orig_strings = orig_dex_cache->GetStrings(); + mirror::StringDexCacheType* relocated_strings = RelocatedAddressOfPointer(orig_strings); copy_dex_cache->SetField64<false>( mirror::DexCache::StringsOffset(), static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_strings))); diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index c4ec72685f..e25e93fcb0 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -191,7 +191,7 @@ .cfi_rel_offset r11, 44 .cfi_rel_offset ip, 48 .cfi_rel_offset lr, 52 - vpush {d0-d15} @ 32 words of float args. + vpush {d0-d15} @ 32 words, 2 for each of the 16 saved doubles. .cfi_adjust_cfa_offset 128 sub sp, #8 @ 2 words of space, alignment padding and Method* .cfi_adjust_cfa_offset 8 @@ -1030,11 +1030,49 @@ ENTRY art_quick_set64_instance END art_quick_set64_instance /* - * Entry from managed code to resolve a string, this stub will allocate a String and deliver an - * exception on error. On success the String is returned. R0 holds the string index. The fast - * path check for hit in strings cache has already been performed. + * Entry from managed code to resolve a string, this stub will + * check the dex cache for a matching string (the fast path), and if not found, + * it will allocate a String and deliver an exception on error. + * On success the String is returned. R0 holds the string index. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +ENTRY art_quick_resolve_string + ldr r1, [sp] @ load referrer + ldr r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class + ldr r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache + ubfx r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS + add r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT + ldrd r2, r3, [r1] @ load index into r3 and pointer into r2 + cmp r0, r3 + bne .Lart_quick_resolve_string_slow_path +#ifdef USE_READ_BARRIER + ldr r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz r3, .Lart_quick_resolve_string_marking +#endif + mov r0, r2 + bx lr +// Slow path case, the index did not match +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC + mov r1, r9 @ pass Thread::Current + mov r3, sp + bl artResolveStringFromCode @ (uint32_t type_idx, Method* method, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + mov r0, r2 + bne .Lart_quick_resolve_string_no_rb + push {r1, r2, r3, lr} @ Save x1, LR + .cfi_adjust_cfa_offset 16 + bl artReadBarrierMark @ Get the marked string back. + pop {r1, r2, r3, lr} @ Restore registers. + .cfi_adjust_cfa_offset -16 +.Lart_quick_resolve_string_no_rb: + bx lr +END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 4289cabbc6..202846a679 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -331,6 +331,7 @@ #endif // Save FP registers. + // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned. str d0, [sp, #8] stp d1, d2, [sp, #16] stp d3, d4, [sp, #32] @@ -431,6 +432,7 @@ .macro RESTORE_SAVE_EVERYTHING_FRAME // Restore FP registers. + // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned. ldr d0, [sp, #8] ldp d1, d2, [sp, #16] ldp d3, d4, [sp, #32] @@ -1784,11 +1786,48 @@ ENTRY art_quick_set64_static END art_quick_set64_static /* - * Entry from managed code to resolve a string, this stub will allocate a String and deliver an - * exception on error. On success the String is returned. w0 holds the string index. The fast - * path check for hit in strings cache has already been performed. + * Entry from managed code to resolve a string, this stub will + * check the dex cache for a matching string (the fast path), and if not found, + * it will allocate a String and deliver an exception on error. + * On success the String is returned. R0 holds the string index. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +ENTRY art_quick_resolve_string + ldr x1, [sp] // load referrer + ldr w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class + ldr x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache + and x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE // get masked string index into x2 + ldr x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x2 + cmp x0, x2, lsr #32 // compare against upper 32 bits + bne .Lart_quick_resolve_string_slow_path + ubfx x0, x2, #0, #32 // extract lower 32 bits into x0 +#ifdef USE_READ_BARRIER + // Most common case: GC is not marking. + ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz x3, .Lart_quick_resolve_string_marking +#endif + ret + +// Slow path case, the index did not match. +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + mov x1, xSELF // pass Thread::Current + bl artResolveStringFromCode // (int32_t string_idx, Thread* self) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + ldr x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tbnz x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb + // Save LR so that we can return, also x1 for alignment purposes. + stp x1, xLR, [sp, #-16]! // Save x1, LR. + bl artReadBarrierMark // Get the marked string back. + ldp x1, xLR, [sp], #16 // Restore registers. +.Lart_quick_resolve_string_no_rb: + ret + +END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 80bb51d9b6..10adb3ac05 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -1203,6 +1203,7 @@ TEST_F(StubTest, AllocObjectArray) { TEST_F(StubTest, StringCompareTo) { + TEST_DISABLED_FOR_STRING_COMPRESSION(); // There is no StringCompareTo runtime entrypoint for __arm__ or __aarch64__. #if defined(__i386__) || defined(__mips__) || \ (defined(__x86_64__) && !defined(__APPLE__)) diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc index 3efeb406e9..c7af249c1c 100644 --- a/runtime/arch/x86/fault_handler_x86.cc +++ b/runtime/arch/x86/fault_handler_x86.cc @@ -191,6 +191,27 @@ static uint32_t GetInstructionSize(const uint8_t* pc) { immediate_size = operand_size_prefix ? 2 : 4; break; + case 0xf6: + case 0xf7: + modrm = *pc++; + has_modrm = true; + switch ((modrm >> 3) & 7) { // Extract "reg/opcode" from "modr/m". + case 0: // test + immediate_size = (opcode == 0xf6) ? 1 : (operand_size_prefix ? 2 : 4); + break; + case 2: // not + case 3: // neg + case 4: // mul + case 5: // imul + case 6: // div + case 7: // idiv + break; + default: + unhandled_instruction = true; + break; + } + break; + default: unhandled_instruction = true; break; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 2e9682e563..282f10d410 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1108,7 +1108,54 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB END_FUNCTION art_quick_alloc_object_region_tlab -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +DEFINE_FUNCTION art_quick_resolve_string + SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx + movl FRAME_SIZE_SAVE_REFS_ONLY(%esp), %ecx // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx // get declaring class + movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx + andl %eax, %edx + shl LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %edx + addl %ecx, %edx + movlps (%edx), %xmm0 // load string idx and pointer to xmm0 + movd %xmm0, %ecx // extract pointer + pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits + movd %xmm0, %edx // extract index + cmp %edx, %eax + jne .Lart_quick_resolve_string_slow_path + movl %ecx, %eax +#ifdef USE_READ_BARRIER + cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_resolve_string_marking +#endif + RESTORE_SAVE_REFS_ONLY_FRAME + ret +.Lart_quick_resolve_string_slow_path: + // Outgoing argument set up + subl LITERAL(8), %esp // push padding + CFI_ADJUST_CFA_OFFSET(8) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + PUSH eax // pass arg1 + call SYMBOL(artResolveStringFromCode) + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +.Lart_quick_resolve_string_marking: + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) + jnz .Lart_quick_resolve_string_no_rb + subl LITERAL(12), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(12) + PUSH eax // Pass the string as the first param. + call SYMBOL(artReadBarrierMark) + addl LITERAL(16), %esp + CFI_ADJUST_CFA_OFFSET(-16) +.Lart_quick_resolve_string_no_rb: + RESTORE_SAVE_REFS_ONLY_FRAME + ret +END_FUNCTION art_quick_resolve_string + ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 32768b0263..62808abf7c 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1330,7 +1330,52 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB END_FUNCTION art_quick_alloc_object_initialized_region_tlab -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +DEFINE_FUNCTION art_quick_resolve_string + movq 8(%rsp), %rcx // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx // get declaring class + movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx // get string dex cache + movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx + andq %rdi, %rdx + shlq LITERAL(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), %rdx + addq %rcx, %rdx + movq %rax, %rcx + movq (%rdx), %rdx + movq %rdx, %rax + movl %eax, %eax + shrq LITERAL(32), %rdx + cmp %rdx, %rdi + jne .Lart_quick_resolve_string_slow_path +#ifdef USE_READ_BARRIER + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_resolve_string_marking +#endif + ret +// Slow path, the index did not match +.Lart_quick_resolve_string_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME + movq %rcx, %rax + // Outgoing argument set up + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artResolveStringFromCode) // artResolveStringFromCode(arg0, referrer, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax) + jnz .Lart_quick_resolve_string_no_rb + // Save LR so that we can return, also x1 for alignment purposes + PUSH rdi + PUSH rsi + subq LITERAL(8), %rsp // 16 byte alignment + movq %rax, %rdi + call SYMBOL(artReadBarrierMark) + addq LITERAL(8), %rsp + POP rsi + POP rdi +.Lart_quick_resolve_string_no_rb: + ret +END_FUNCTION art_quick_resolve_string + ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/art_method.cc b/runtime/art_method.cc index d812590cc7..f9bc249a42 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -477,7 +477,7 @@ const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) { DCHECK(method_header->Contains(pc)) << PrettyMethod(this) - << std::hex << pc << " " << oat_entry_point + << " " << std::hex << pc << " " << oat_entry_point << " " << (uintptr_t)(method_header->code_ + method_header->code_size_); return method_header; } diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 848f8e532c..102b993686 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -19,12 +19,15 @@ #if defined(__cplusplus) #include "art_method.h" +#include "base/bit_utils.h" #include "gc/allocator/rosalloc.h" #include "gc/heap.h" #include "jit/jit.h" #include "lock_word.h" #include "mirror/class.h" +#include "mirror/dex_cache.h" #include "mirror/string.h" +#include "utils/dex_cache_arrays_layout.h" #include "runtime.h" #include "thread.h" #endif diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index b84e29f7ce..aeb990cae8 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -163,6 +163,7 @@ Arena::Arena() : bytes_allocated_(0), next_(nullptr) { MallocArena::MallocArena(size_t size) { memory_ = reinterpret_cast<uint8_t*>(calloc(1, size)); CHECK(memory_ != nullptr); // Abort on OOM. + DCHECK_ALIGNED(memory_, ArenaAllocator::kAlignment); size_ = size; } @@ -370,6 +371,7 @@ uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) { arena_head_ = new_arena; // Update our internal data structures. begin_ = new_arena->Begin(); + DCHECK_ALIGNED(begin_, kAlignment); ptr_ = begin_ + bytes; end_ = new_arena->End(); } diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index 6c1a8984cd..3fad96b39b 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -310,6 +310,7 @@ class ArenaAllocator return AllocFromNewArena(bytes); } uint8_t* ret = ptr_; + DCHECK_ALIGNED(ret, kAlignment); ptr_ += bytes; return ret; } @@ -319,20 +320,24 @@ class ArenaAllocator ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE { DCHECK_GE(new_size, ptr_size); DCHECK_EQ(ptr == nullptr, ptr_size == 0u); - auto* end = reinterpret_cast<uint8_t*>(ptr) + ptr_size; + // We always allocate aligned. + const size_t aligned_ptr_size = RoundUp(ptr_size, kAlignment); + auto* end = reinterpret_cast<uint8_t*>(ptr) + aligned_ptr_size; // If we haven't allocated anything else, we can safely extend. if (end == ptr_) { DCHECK(!IsRunningOnMemoryTool()); // Red zone prevents end == ptr_. - const size_t size_delta = new_size - ptr_size; + const size_t aligned_new_size = RoundUp(new_size, kAlignment); + const size_t size_delta = aligned_new_size - aligned_ptr_size; // Check remain space. const size_t remain = end_ - ptr_; if (remain >= size_delta) { ptr_ += size_delta; ArenaAllocatorStats::RecordAlloc(size_delta, kind); + DCHECK_ALIGNED(ptr_, kAlignment); return ptr; } } - auto* new_ptr = Alloc(new_size, kind); + auto* new_ptr = Alloc(new_size, kind); // Note: Alloc will take care of aligning new_size. memcpy(new_ptr, ptr, ptr_size); // TODO: Call free on ptr if linear alloc supports free. return new_ptr; @@ -362,11 +367,12 @@ class ArenaAllocator bool Contains(const void* ptr) const; + static constexpr size_t kAlignment = 8; + private: void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind); uint8_t* AllocFromNewArena(size_t bytes); - static constexpr size_t kAlignment = 8; void UpdateBytesAllocated(); diff --git a/runtime/base/arena_allocator_test.cc b/runtime/base/arena_allocator_test.cc index 9de3cc4312..fd48a3fd78 100644 --- a/runtime/base/arena_allocator_test.cc +++ b/runtime/base/arena_allocator_test.cc @@ -16,6 +16,7 @@ #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" +#include "base/memory_tool.h" #include "gtest/gtest.h" namespace art { @@ -124,4 +125,221 @@ TEST_F(ArenaAllocatorTest, LargeAllocations) { } } +TEST_F(ArenaAllocatorTest, AllocAlignment) { + ArenaPool pool; + ArenaAllocator arena(&pool); + for (size_t iterations = 0; iterations <= 10; ++iterations) { + for (size_t size = 1; size <= ArenaAllocator::kAlignment + 1; ++size) { + void* allocation = arena.Alloc(size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(allocation)) + << reinterpret_cast<uintptr_t>(allocation); + } + } +} + +TEST_F(ArenaAllocatorTest, ReallocReuse) { + // Realloc does not reuse arenas when running under sanitization. So we cannot do those + if (RUNNING_ON_MEMORY_TOOL != 0) { + printf("WARNING: TEST DISABLED FOR MEMORY_TOOL\n"); + return; + } + + { + // Case 1: small aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_EQ(original_allocation, realloc_allocation); + } + + { + // Case 2: small aligned allocation, non-aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_EQ(original_allocation, realloc_allocation); + } + + { + // Case 3: small non-aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = ArenaAllocator::kAlignment * 4; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_EQ(original_allocation, realloc_allocation); + } + + { + // Case 4: small non-aligned allocation, aligned non-extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_EQ(original_allocation, realloc_allocation); + } + + // The next part is brittle, as the default size for an arena is variable, and we don't know about + // sanitization. + + { + // Case 5: large allocation, aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5; + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_NE(original_allocation, realloc_allocation); + } + + { + // Case 6: large allocation, non-aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - + ArenaAllocator::kAlignment * 4 - + ArenaAllocator::kAlignment / 2; + void* original_allocation = arena.Alloc(original_size); + + const size_t new_size = Arena::kDefaultSize + + ArenaAllocator::kAlignment * 2 + + ArenaAllocator::kAlignment / 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_NE(original_allocation, realloc_allocation); + } +} + +TEST_F(ArenaAllocatorTest, ReallocAlignment) { + { + // Case 1: small aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 2: small aligned allocation, non-aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 3: small non-aligned allocation, aligned extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 4; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 4: small non-aligned allocation, aligned non-extend inside arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = ArenaAllocator::kAlignment * 2 + (ArenaAllocator::kAlignment / 2); + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = ArenaAllocator::kAlignment * 3; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + // The next part is brittle, as the default size for an arena is variable, and we don't know about + // sanitization. + + { + // Case 5: large allocation, aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - ArenaAllocator::kAlignment * 5; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = Arena::kDefaultSize + ArenaAllocator::kAlignment * 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } + + { + // Case 6: large allocation, non-aligned extend into next arena. + ArenaPool pool; + ArenaAllocator arena(&pool); + + const size_t original_size = Arena::kDefaultSize - + ArenaAllocator::kAlignment * 4 - + ArenaAllocator::kAlignment / 2; + void* original_allocation = arena.Alloc(original_size); + ASSERT_TRUE(IsAligned<ArenaAllocator::kAlignment>(original_allocation)); + + const size_t new_size = Arena::kDefaultSize + + ArenaAllocator::kAlignment * 2 + + ArenaAllocator::kAlignment / 2; + void* realloc_allocation = arena.Realloc(original_allocation, original_size, new_size); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(realloc_allocation)); + + void* after_alloc = arena.Alloc(1); + EXPECT_TRUE(IsAligned<ArenaAllocator::kAlignment>(after_alloc)); + } +} + + } // namespace art diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h index 51dfba87eb..77f0ea2b7c 100644 --- a/runtime/base/dchecked_vector.h +++ b/runtime/base/dchecked_vector.h @@ -59,10 +59,8 @@ class dchecked_vector : private std::vector<T, Alloc> { : Base() { } explicit dchecked_vector(const allocator_type& alloc) : Base(alloc) { } - // Note that we cannot forward to std::vector(size_type, const allocator_type&) because it is not - // available in C++11, which is the latest GCC can support. http://b/25022512 explicit dchecked_vector(size_type n, const allocator_type& alloc = allocator_type()) - : Base(alloc) { resize(n); } + : Base(n, alloc) { } dchecked_vector(size_type n, const value_type& value, const allocator_type& alloc = allocator_type()) diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h index 4af47d170a..ca9a694144 100644 --- a/runtime/base/histogram-inl.h +++ b/runtime/base/histogram-inl.h @@ -228,10 +228,8 @@ inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const { DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001); } -#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wfloat-equal" -#endif template <class Value> inline double Histogram<Value>::Percentile(double per, const CumulativeData& data) const { @@ -273,9 +271,7 @@ inline double Histogram<Value>::Percentile(double per, const CumulativeData& dat return value; } -#if defined(__clang__) #pragma clang diagnostic pop -#endif } // namespace art #endif // ART_RUNTIME_BASE_HISTOGRAM_INL_H_ diff --git a/runtime/base/macros.h b/runtime/base/macros.h index 5a50247f5a..0ec6e6de97 100644 --- a/runtime/base/macros.h +++ b/runtime/base/macros.h @@ -30,16 +30,8 @@ _rc; }) #endif -#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - -// C++11 final and override keywords that were introduced in GCC version 4.7. -#if defined(__clang__) || GCC_VERSION >= 40700 #define OVERRIDE override #define FINAL final -#else -#define OVERRIDE -#define FINAL -#endif // Declare a friend relationship in a class with a test. Used rather that FRIEND_TEST to avoid // globally importing gtest/gtest.h into the main ART header files. @@ -158,12 +150,9 @@ char (&ArraySizeHelper(T (&array)[N]))[N]; #define ALWAYS_INLINE __attribute__ ((always_inline)) #endif -#ifdef __clang__ -/* clang doesn't like attributes on lambda functions */ +// clang doesn't like attributes on lambda functions. It would be nice to say: +// #define ALWAYS_INLINE_LAMBDA ALWAYS_INLINE #define ALWAYS_INLINE_LAMBDA -#else -#define ALWAYS_INLINE_LAMBDA ALWAYS_INLINE -#endif #define NO_INLINE __attribute__ ((noinline)) @@ -228,75 +217,46 @@ template<typename... T> void UNUSED(const T&...) {} // // In either case this macro has no effect on runtime behavior and performance // of code. -#if defined(__clang__) && __cplusplus >= 201103L && defined(__has_warning) #if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") #define FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT #endif -#endif #ifndef FALLTHROUGH_INTENDED #define FALLTHROUGH_INTENDED do { } while (0) #endif // Annotalysis thread-safety analysis support. -#if defined(__SUPPORT_TS_ANNOTATION__) || defined(__clang__) -#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) -#else -#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op -#endif -#define ACQUIRED_AFTER(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) -#define ACQUIRED_BEFORE(...) THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) -#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) -#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded) -#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) -#define NO_THREAD_SAFETY_ANALYSIS THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) +#define ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) +#define ACQUIRED_BEFORE(...) __attribute__((acquired_before(__VA_ARGS__))) +#define GUARDED_BY(x) __attribute__((guarded_by(x))) +#define GUARDED_VAR __attribute__((guarded)) +#define LOCK_RETURNED(x) __attribute__((lock_returned(x))) +#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) #define PT_GUARDED_BY(x) // THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x)) -#define PT_GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded) -#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) - -#if defined(__clang__) -#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__)) -#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__)) -#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__)) -#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__)) -#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__)) -#define REQUIRES(...) THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) -#define SHARED_REQUIRES(...) THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) -#define CAPABILITY(...) THREAD_ANNOTATION_ATTRIBUTE__(capability(__VA_ARGS__)) -#define SHARED_CAPABILITY(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_capability(__VA_ARGS__)) -#define ASSERT_CAPABILITY(...) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(__VA_ARGS__)) -#define ASSERT_SHARED_CAPABILITY(...) THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(__VA_ARGS__)) -#define RETURN_CAPABILITY(...) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(__VA_ARGS__)) -#define TRY_ACQUIRE(...) THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) -#define TRY_ACQUIRE_SHARED(...) THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) -#define ACQUIRE(...) THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) -#define ACQUIRE_SHARED(...) THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) -#define RELEASE(...) THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) -#define RELEASE_SHARED(...) THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) -#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) -#else -#define EXCLUSIVE_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(__VA_ARGS__)) -#define EXCLUSIVE_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(__VA_ARGS__)) -#define SHARED_LOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(__VA_ARGS__)) -#define SHARED_TRYLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(__VA_ARGS__)) -#define UNLOCK_FUNCTION(...) THREAD_ANNOTATION_ATTRIBUTE__(unlock(__VA_ARGS__)) -#define REQUIRES(...) -#define SHARED_REQUIRES(...) -#define CAPABILITY(...) -#define SHARED_CAPABILITY(...) -#define ASSERT_CAPABILITY(...) -#define ASSERT_SHARED_CAPABILITY(...) -#define RETURN_CAPABILITY(...) -#define TRY_ACQUIRE(...) -#define TRY_ACQUIRE_SHARED(...) -#define ACQUIRE(...) -#define ACQUIRE_SHARED(...) -#define RELEASE(...) -#define RELEASE_SHARED(...) -#define SCOPED_CAPABILITY -#endif +#define PT_GUARDED_VAR __attribute__((point_to_guarded)) +#define SCOPED_LOCKABLE __attribute__((scoped_lockable)) + +#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__((exclusive_trylock_function(__VA_ARGS__))) +#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) +#define SHARED_TRYLOCK_FUNCTION(...) __attribute__((shared_trylock_function(__VA_ARGS__))) +#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) +#define REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) +#define SHARED_REQUIRES(...) __attribute__((requires_shared_capability(__VA_ARGS__))) +#define CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) +#define SHARED_CAPABILITY(...) __attribute__((shared_capability(__VA_ARGS__))) +#define ASSERT_CAPABILITY(...) __attribute__((assert_capability(__VA_ARGS__))) +#define ASSERT_SHARED_CAPABILITY(...) __attribute__((assert_shared_capability(__VA_ARGS__))) +#define RETURN_CAPABILITY(...) __attribute__((lock_returned(__VA_ARGS__))) +#define TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) +#define TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) +#define ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) +#define ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) +#define RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) +#define RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) +#define SCOPED_CAPABILITY __attribute__((scoped_lockable)) #define LOCKABLE CAPABILITY("mutex") #define SHARED_LOCKABLE SHARED_CAPABILITY("mutex") diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 264a530a36..fec918b681 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -98,12 +98,7 @@ class ScopedAllMutexesLock FINAL { } ~ScopedAllMutexesLock() { -#if !defined(__clang__) - // TODO: remove this workaround target GCC/libc++/bionic bug "invalid failure memory model". - while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakSequentiallyConsistent(mutex_, 0)) { -#else while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakRelease(mutex_, 0)) { -#endif NanoSleep(100); } } diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h index f2575f702f..97aa499b29 100644 --- a/runtime/class_linker-inl.h +++ b/runtime/class_linker-inl.h @@ -27,6 +27,8 @@ #include "mirror/object_array.h" #include "handle_scope-inl.h" +#include <atomic> + namespace art { inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) { @@ -63,18 +65,21 @@ inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class** inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) { mirror::Class* declaring_class = referrer->GetDeclaringClass(); // MethodVerifier refuses methods with string_idx out of bounds. - DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings()); - mirror::String* resolved_string = declaring_class->GetDexCacheStrings()[string_idx].Read(); - if (UNLIKELY(resolved_string == nullptr)) { + DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());; + mirror::String* string = + mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(), + string_idx, + mirror::DexCache::kDexCacheStringCacheSize).Read(); + if (UNLIKELY(string == nullptr)) { StackHandleScope<1> hs(Thread::Current()); Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache())); const DexFile& dex_file = *dex_cache->GetDexFile(); - resolved_string = ResolveString(dex_file, string_idx, dex_cache); - if (resolved_string != nullptr) { - DCHECK_EQ(dex_cache->GetResolvedString(string_idx), resolved_string); + string = ResolveString(dex_file, string_idx, dex_cache); + if (string != nullptr) { + DCHECK_EQ(dex_cache->GetResolvedString(string_idx), string); } } - return resolved_string; + return string; } inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtMethod* referrer) { diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 4d48da6a83..f4400c3de1 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -66,6 +66,7 @@ #include "mirror/class.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" +#include "mirror/dex_cache.h" #include "mirror/dex_cache-inl.h" #include "mirror/field.h" #include "mirror/iftable-inl.h" @@ -1271,7 +1272,10 @@ bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches( // If the oat file expects the dex cache arrays to be in the BSS, then allocate there and // copy over the arrays. DCHECK(dex_file != nullptr); - const size_t num_strings = dex_file->NumStringIds(); + size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize; + if (dex_file->NumStringIds() < num_strings) { + num_strings = dex_file->NumStringIds(); + } const size_t num_types = dex_file->NumTypeIds(); const size_t num_methods = dex_file->NumMethodIds(); const size_t num_fields = dex_file->NumFieldIds(); @@ -1281,16 +1285,17 @@ bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches( CHECK_EQ(num_fields, dex_cache->NumResolvedFields()); DexCacheArraysLayout layout(image_pointer_size_, dex_file); uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays(); - // The space is not yet visible to the GC, we can avoid the read barriers and use - // std::copy_n. if (num_strings != 0u) { - GcRoot<mirror::String>* const image_resolved_strings = dex_cache->GetStrings(); - GcRoot<mirror::String>* const strings = - reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset()); - for (size_t j = 0; kIsDebugBuild && j < num_strings; ++j) { - DCHECK(strings[j].IsNull()); + mirror::StringDexCacheType* const image_resolved_strings = dex_cache->GetStrings(); + mirror::StringDexCacheType* const strings = + reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset()); + for (size_t j = 0; j < num_strings; ++j) { + DCHECK_EQ(strings[j].load(std::memory_order_relaxed).string_index, 0u); + DCHECK(strings[j].load(std::memory_order_relaxed).string_pointer.IsNull()); + strings[j].store(image_resolved_strings[j].load(std::memory_order_relaxed), + std::memory_order_relaxed); } - std::copy_n(image_resolved_strings, num_strings, strings); + mirror::StringDexCachePair::Initialize(strings); dex_cache->SetStrings(strings); } if (num_types != 0u) { @@ -1473,14 +1478,14 @@ class UpdateClassLoaderAndResolvedStringsVisitor { bool operator()(mirror::Class* klass) const SHARED_REQUIRES(Locks::mutator_lock_) { if (forward_strings_) { - GcRoot<mirror::String>* strings = klass->GetDexCacheStrings(); + mirror::StringDexCacheType* strings = klass->GetDexCacheStrings(); if (strings != nullptr) { DCHECK( space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains( reinterpret_cast<uint8_t*>(strings) - space_->Begin())) << "String dex cache array for " << PrettyClass(klass) << " is not in app image"; // Dex caches have already been updated, so take the strings pointer from there. - GcRoot<mirror::String>* new_strings = klass->GetDexCache()->GetStrings(); + mirror::StringDexCacheType* new_strings = klass->GetDexCache()->GetStrings(); DCHECK_NE(strings, new_strings); klass->SetDexCacheStrings(new_strings); } @@ -2079,18 +2084,31 @@ mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, // Zero-initialized. raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size())); } - GcRoot<mirror::String>* strings = (dex_file.NumStringIds() == 0u) ? nullptr : - reinterpret_cast<GcRoot<mirror::String>*>(raw_arrays + layout.StringsOffset()); + mirror::StringDexCacheType* strings = (dex_file.NumStringIds() == 0u) ? nullptr : + reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset()); GcRoot<mirror::Class>* types = (dex_file.NumTypeIds() == 0u) ? nullptr : reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset()); ArtMethod** methods = (dex_file.NumMethodIds() == 0u) ? nullptr : reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset()); ArtField** fields = (dex_file.NumFieldIds() == 0u) ? nullptr : reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset()); + size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize; + if (dex_file.NumStringIds() < num_strings) { + num_strings = dex_file.NumStringIds(); + } + DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) << + "Expected raw_arrays to align to StringDexCacheType."; + DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) << + "Expected StringsOffset() to align to StringDexCacheType."; + DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) << + "Expected strings to align to StringDexCacheType."; + static_assert(alignof(mirror::StringDexCacheType) == 8u, + "Expected StringDexCacheType to have align of 8."); if (kIsDebugBuild) { // Sanity check to make sure all the dex cache arrays are empty. b/28992179 - for (size_t i = 0; i < dex_file.NumStringIds(); ++i) { - CHECK(strings[i].Read<kWithoutReadBarrier>() == nullptr); + for (size_t i = 0; i < num_strings; ++i) { + CHECK_EQ(strings[i].load(std::memory_order_relaxed).string_index, 0u); + CHECK(strings[i].load(std::memory_order_relaxed).string_pointer.IsNull()); } for (size_t i = 0; i < dex_file.NumTypeIds(); ++i) { CHECK(types[i].Read<kWithoutReadBarrier>() == nullptr); @@ -2102,10 +2120,13 @@ mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size_) == nullptr); } } + if (strings != nullptr) { + mirror::StringDexCachePair::Initialize(strings); + } dex_cache->Init(&dex_file, location.Get(), strings, - dex_file.NumStringIds(), + num_strings, types, dex_file.NumTypeIds(), methods, @@ -4525,7 +4546,8 @@ bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass, } self->AllowThreadSuspension(); - CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << PrettyClass(klass.Get()); + CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << PrettyClass(klass.Get()) + << " self.tid=" << self->GetTid() << " clinit.tid=" << klass->GetClinitThreadId(); // From here out other threads may observe that we're initializing and so changes of state // require the a notification. diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index f445e52d20..2d16a493c8 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -207,6 +207,12 @@ class CheckJniAbortCatcher { return; \ } +#define TEST_DISABLED_FOR_STRING_COMPRESSION() \ + if (mirror::kUseStringCompression) { \ + printf("WARNING: TEST DISABLED FOR STRING COMPRESSION\n"); \ + return; \ + } + } // namespace art namespace std { diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc index 99732c64ab..e1da23c208 100644 --- a/runtime/common_throws.cc +++ b/runtime/common_throws.cc @@ -402,6 +402,16 @@ void ThrowNullPointerExceptionForMethodAccess(ArtMethod* method, dex_file, type); } +static bool IsValidReadBarrierImplicitCheck(uintptr_t addr) { + DCHECK(kEmitCompilerReadBarrier); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Uint32Value(); + if (kUseBakerReadBarrier && (kRuntimeISA == kX86 || kRuntimeISA == kX86_64)) { + constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; + monitor_offset += gray_byte_position; + } + return addr == monitor_offset; +} + static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr) SHARED_REQUIRES(Locks::mutator_lock_) { if (!CanDoImplicitNullCheckOn(addr)) { @@ -424,9 +434,13 @@ static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instru return true; } + case Instruction::IGET_OBJECT: + if (kEmitCompilerReadBarrier && IsValidReadBarrierImplicitCheck(addr)) { + return true; + } + FALLTHROUGH_INTENDED; case Instruction::IGET: case Instruction::IGET_WIDE: - case Instruction::IGET_OBJECT: case Instruction::IGET_BOOLEAN: case Instruction::IGET_BYTE: case Instruction::IGET_CHAR: @@ -440,18 +454,20 @@ static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instru case Instruction::IPUT_SHORT: { ArtField* field = Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false); - return (addr == 0) || - (addr == field->GetOffset().Uint32Value()) || - (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value())); + return (addr == 0) || (addr == field->GetOffset().Uint32Value()); } + case Instruction::IGET_OBJECT_QUICK: + if (kEmitCompilerReadBarrier && IsValidReadBarrierImplicitCheck(addr)) { + return true; + } + FALLTHROUGH_INTENDED; case Instruction::IGET_QUICK: case Instruction::IGET_BOOLEAN_QUICK: case Instruction::IGET_BYTE_QUICK: case Instruction::IGET_CHAR_QUICK: case Instruction::IGET_SHORT_QUICK: case Instruction::IGET_WIDE_QUICK: - case Instruction::IGET_OBJECT_QUICK: case Instruction::IPUT_QUICK: case Instruction::IPUT_BOOLEAN_QUICK: case Instruction::IPUT_BYTE_QUICK: @@ -459,14 +475,16 @@ static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instru case Instruction::IPUT_SHORT_QUICK: case Instruction::IPUT_WIDE_QUICK: case Instruction::IPUT_OBJECT_QUICK: { - return (addr == 0u) || - (addr == instr.VRegC_22c()) || - (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value())); + return (addr == 0u) || (addr == instr.VRegC_22c()); } + case Instruction::AGET_OBJECT: + if (kEmitCompilerReadBarrier && IsValidReadBarrierImplicitCheck(addr)) { + return true; + } + FALLTHROUGH_INTENDED; case Instruction::AGET: case Instruction::AGET_WIDE: - case Instruction::AGET_OBJECT: case Instruction::AGET_BOOLEAN: case Instruction::AGET_BYTE: case Instruction::AGET_CHAR: @@ -482,9 +500,7 @@ static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instru case Instruction::ARRAY_LENGTH: { // The length access should crash. We currently do not do implicit checks on // the array access itself. - return (addr == 0u) || - (addr == mirror::Array::LengthOffset().Uint32Value()) || - (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value())); + return (addr == 0u) || (addr == mirror::Array::LengthOffset().Uint32Value()); } default: { diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 2a5198bf01..a5b0689473 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -1286,8 +1286,7 @@ JDWP::JdwpError Dbg::CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_ if (c->IsStringClass()) { // Special case for java.lang.String. gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - new_object = mirror::String::Alloc<true>(self, 0, allocator_type, visitor); + new_object = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { new_object = c->AllocObject(self); } @@ -4059,7 +4058,7 @@ void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInv // Prepare JDWP ids for the reply. JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty()); const bool is_object_result = (result_tag == JDWP::JT_OBJECT); - StackHandleScope<2> hs(soa.Self()); + StackHandleScope<3> hs(soa.Self()); Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr); Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); @@ -4098,10 +4097,17 @@ void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInv // unless we threw, in which case we return null. DCHECK_EQ(JDWP::JT_VOID, result_tag); if (exceptionObjectId == 0) { - // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the - // object registry. - result_value = GetObjectRegistry()->Add(pReq->receiver.Read()); - result_tag = TagFromObject(soa, pReq->receiver.Read()); + if (m->GetDeclaringClass()->IsStringClass()) { + // For string constructors, the new string is remapped to the receiver (stored in ref). + Handle<mirror::Object> decoded_ref = hs.NewHandle(soa.Self()->DecodeJObject(ref.get())); + result_value = gRegistry->Add(decoded_ref); + result_tag = TagFromObject(soa, decoded_ref.Get()); + } else { + // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the + // object registry. + result_value = GetObjectRegistry()->Add(pReq->receiver.Read()); + result_tag = TagFromObject(soa, pReq->receiver.Read()); + } } else { result_value = 0; result_tag = JDWP::JT_OBJECT; @@ -4327,10 +4333,16 @@ void Dbg::DdmSendThreadNotification(Thread* t, uint32_t type) { Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa))); size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0; const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr; + bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false; std::vector<uint8_t> bytes; JDWP::Append4BE(bytes, t->GetThreadId()); - JDWP::AppendUtf16BE(bytes, chars, char_count); + if (is_compressed) { + const uint8_t* chars_compressed = name->GetValueCompressed(); + JDWP::AppendUtf16CompressedBE(bytes, chars_compressed, char_count); + } else { + JDWP::AppendUtf16BE(bytes, chars, char_count); + } CHECK_EQ(bytes.size(), char_count*2 + sizeof(uint32_t)*2); Dbg::DdmSendChunk(type, bytes); } diff --git a/runtime/entrypoints/quick/quick_math_entrypoints.cc b/runtime/entrypoints/quick/quick_math_entrypoints.cc index 1c658b7057..51d2784fd8 100644 --- a/runtime/entrypoints/quick/quick_math_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_math_entrypoints.cc @@ -18,10 +18,8 @@ namespace art { -#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wfloat-equal" -#endif int CmplFloat(float a, float b) { if (a == b) { @@ -67,9 +65,7 @@ int CmplDouble(double a, double b) { return -1; } -#if defined(__clang__) #pragma clang diagnostic pop -#endif extern "C" int64_t artLmul(int64_t a, int64_t b) { return a * b; diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h index 4cf5b4f643..9feaf415a5 100644 --- a/runtime/gc/accounting/space_bitmap-inl.h +++ b/runtime/gc/accounting/space_bitmap-inl.h @@ -36,7 +36,7 @@ inline bool SpaceBitmap<kAlignment>::AtomicTestAndSet(const mirror::Object* obj) const uintptr_t offset = addr - heap_begin_; const size_t index = OffsetToIndex(offset); const uintptr_t mask = OffsetToMask(offset); - Atomic<uintptr_t>* atomic_entry = reinterpret_cast<Atomic<uintptr_t>*>(&bitmap_begin_[index]); + Atomic<uintptr_t>* atomic_entry = &bitmap_begin_[index]; DCHECK_LT(index, bitmap_size_ / sizeof(intptr_t)) << " bitmap_size_ = " << bitmap_size_; uintptr_t old_word; do { @@ -58,7 +58,7 @@ inline bool SpaceBitmap<kAlignment>::Test(const mirror::Object* obj) const { DCHECK(bitmap_begin_ != nullptr); DCHECK_GE(addr, heap_begin_); const uintptr_t offset = addr - heap_begin_; - return (bitmap_begin_[OffsetToIndex(offset)] & OffsetToMask(offset)) != 0; + return (bitmap_begin_[OffsetToIndex(offset)].LoadRelaxed() & OffsetToMask(offset)) != 0; } template<size_t kAlignment> template<typename Visitor> @@ -116,7 +116,7 @@ inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, uin // Traverse the middle, full part. for (size_t i = index_start + 1; i < index_end; ++i) { - uintptr_t w = bitmap_begin_[i]; + uintptr_t w = bitmap_begin_[i].LoadRelaxed(); if (w != 0) { const uintptr_t ptr_base = IndexToOffset(i) + heap_begin_; do { @@ -164,8 +164,8 @@ inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) { const size_t index = OffsetToIndex(offset); const uintptr_t mask = OffsetToMask(offset); DCHECK_LT(index, bitmap_size_ / sizeof(intptr_t)) << " bitmap_size_ = " << bitmap_size_; - uintptr_t* address = &bitmap_begin_[index]; - uintptr_t old_word = *address; + Atomic<uintptr_t>* atomic_entry = &bitmap_begin_[index]; + uintptr_t old_word = atomic_entry->LoadRelaxed(); if (kSetBit) { // Check the bit before setting the word incase we are trying to mark a read only bitmap // like an image space bitmap. This bitmap is mapped as read only and will fault if we @@ -173,10 +173,10 @@ inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) { // occur if we check before setting the bit. This also prevents dirty pages that would // occur if the bitmap was read write and we did not check the bit. if ((old_word & mask) == 0) { - *address = old_word | mask; + atomic_entry->StoreRelaxed(old_word | mask); } } else { - *address = old_word & ~mask; + atomic_entry->StoreRelaxed(old_word & ~mask); } DCHECK_EQ(Test(obj), kSetBit); return (old_word & mask) != 0; diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index b43f77f6b7..3df02ed443 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -51,7 +51,9 @@ SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap( template<size_t kAlignment> SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size, const void* heap_begin) - : mem_map_(mem_map), bitmap_begin_(bitmap_begin), bitmap_size_(bitmap_size), + : mem_map_(mem_map), + bitmap_begin_(reinterpret_cast<Atomic<uintptr_t>*>(bitmap_begin)), + bitmap_size_(bitmap_size), heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)), name_(name) { CHECK(bitmap_begin_ != nullptr); @@ -104,7 +106,12 @@ void SpaceBitmap<kAlignment>::Clear() { template<size_t kAlignment> void SpaceBitmap<kAlignment>::CopyFrom(SpaceBitmap* source_bitmap) { DCHECK_EQ(Size(), source_bitmap->Size()); - std::copy(source_bitmap->Begin(), source_bitmap->Begin() + source_bitmap->Size() / sizeof(intptr_t), Begin()); + const size_t count = source_bitmap->Size() / sizeof(intptr_t); + Atomic<uintptr_t>* const src = source_bitmap->Begin(); + Atomic<uintptr_t>* const dest = Begin(); + for (size_t i = 0; i < count; ++i) { + dest[i].StoreRelaxed(src[i].LoadRelaxed()); + } } template<size_t kAlignment> @@ -113,9 +120,9 @@ void SpaceBitmap<kAlignment>::Walk(ObjectCallback* callback, void* arg) { CHECK(callback != nullptr); uintptr_t end = OffsetToIndex(HeapLimit() - heap_begin_ - 1); - uintptr_t* bitmap_begin = bitmap_begin_; + Atomic<uintptr_t>* bitmap_begin = bitmap_begin_; for (uintptr_t i = 0; i <= end; ++i) { - uintptr_t w = bitmap_begin[i]; + uintptr_t w = bitmap_begin[i].LoadRelaxed(); if (w != 0) { uintptr_t ptr_base = IndexToOffset(i) + heap_begin_; do { @@ -160,10 +167,10 @@ void SpaceBitmap<kAlignment>::SweepWalk(const SpaceBitmap<kAlignment>& live_bitm size_t start = OffsetToIndex(sweep_begin - live_bitmap.heap_begin_); size_t end = OffsetToIndex(sweep_end - live_bitmap.heap_begin_ - 1); CHECK_LT(end, live_bitmap.Size() / sizeof(intptr_t)); - uintptr_t* live = live_bitmap.bitmap_begin_; - uintptr_t* mark = mark_bitmap.bitmap_begin_; + Atomic<uintptr_t>* live = live_bitmap.bitmap_begin_; + Atomic<uintptr_t>* mark = mark_bitmap.bitmap_begin_; for (size_t i = start; i <= end; i++) { - uintptr_t garbage = live[i] & ~mark[i]; + uintptr_t garbage = live[i].LoadRelaxed() & ~mark[i].LoadRelaxed(); if (UNLIKELY(garbage != 0)) { uintptr_t ptr_base = IndexToOffset(i) + live_bitmap.heap_begin_; do { @@ -251,7 +258,7 @@ void SpaceBitmap<kAlignment>::InOrderWalk(ObjectCallback* callback, void* arg) { uintptr_t end = Size() / sizeof(intptr_t); for (uintptr_t i = 0; i < end; ++i) { // Need uint for unsigned shift. - uintptr_t w = bitmap_begin_[i]; + uintptr_t w = bitmap_begin_[i].LoadRelaxed(); if (UNLIKELY(w != 0)) { uintptr_t ptr_base = IndexToOffset(i) + heap_begin_; while (w != 0) { diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index b8ff471c69..829b1b1644 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -147,7 +147,7 @@ class SpaceBitmap { void CopyFrom(SpaceBitmap* source_bitmap); // Starting address of our internal storage. - uintptr_t* Begin() { + Atomic<uintptr_t>* Begin() { return bitmap_begin_; } @@ -215,7 +215,7 @@ class SpaceBitmap { std::unique_ptr<MemMap> mem_map_; // This bitmap itself, word sized for efficiency in scanning. - uintptr_t* const bitmap_begin_; + Atomic<uintptr_t>* const bitmap_begin_; // Size of this bitmap. size_t bitmap_size_; diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index fb774a4d1e..76f500c204 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -34,32 +34,27 @@ inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegion( // to gray even though the object has already been marked through. This happens if a mutator // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the // object (changes it from white to gray and back to white), and the thread runs and - // incorrectly changes it from white to gray. We need to detect such "false gray" cases and - // change the objects back to white at the end of marking. + // incorrectly changes it from white to gray. If this happens, the object will get added to the + // mark stack again and get changed back to white after it is processed. if (kUseBakerReadBarrier) { - // Test the bitmap first to reduce the chance of false gray cases. + // Test the bitmap first to avoid graying an object that has already been marked through most + // of the time. if (bitmap->Test(ref)) { return ref; } } // This may or may not succeed, which is ok because the object may already be gray. - bool cas_success = false; + bool success = false; if (kUseBakerReadBarrier) { - cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), - ReadBarrier::GrayPtr()); - } - if (bitmap->AtomicTestAndSet(ref)) { - // Already marked. - if (kUseBakerReadBarrier && - cas_success && - // The object could be white here if a thread gets preempted after a success at the - // above AtomicSetReadBarrierPointer, GC has marked through it, and the thread runs up - // to this point. - ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { - // Register a "false-gray" object to change it from gray to white at the end of marking. - PushOntoFalseGrayStack(ref); - } + // GC will mark the bitmap when popping from mark stack. If only the GC is touching the bitmap + // we can avoid an expensive CAS. + // For the baker case, an object is marked if either the mark bit marked or the bitmap bit is + // set. + success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr()); } else { + success = !bitmap->AtomicTestAndSet(ref); + } + if (success) { // Newly marked. if (kUseBakerReadBarrier) { DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr()); @@ -99,13 +94,16 @@ inline mirror::Object* ConcurrentCopying::MarkImmuneSpace(mirror::Object* ref) { return ref; } -template<bool kGrayImmuneObject> +template<bool kGrayImmuneObject, bool kFromGCThread> inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { if (from_ref == nullptr) { return nullptr; } DCHECK(heap_->collector_type_ == kCollectorTypeCC); - if (UNLIKELY(kUseBakerReadBarrier && !is_active_)) { + if (kFromGCThread) { + DCHECK(is_active_); + DCHECK_EQ(Thread::Current(), thread_running_gc_); + } else if (UNLIKELY(kUseBakerReadBarrier && !is_active_)) { // In the lock word forward address state, the read barrier bits // in the lock word are part of the stored forwarding address and // invalid. This is usually OK as the from-space copy of objects @@ -192,6 +190,16 @@ inline mirror::Object* ConcurrentCopying::GetFwdPtr(mirror::Object* from_ref) { } } +inline bool ConcurrentCopying::IsMarkedInUnevacFromSpace(mirror::Object* from_ref) { + // Use load acquire on the read barrier pointer to ensure that we never see a white read barrier + // pointer with an unmarked bit due to reordering. + DCHECK(region_space_->IsInUnevacFromSpace(from_ref)); + if (kUseBakerReadBarrier && from_ref->GetReadBarrierPointerAcquire() == ReadBarrier::GrayPtr()) { + return true; + } + return region_space_bitmap_->Test(from_ref); +} + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 42816a04f1..651669e325 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1302,8 +1302,19 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { << " " << to_ref << " " << to_ref->GetReadBarrierPointer() << " is_marked=" << IsMarked(to_ref); } - // Scan ref fields. - Scan(to_ref); + bool add_to_live_bytes = false; + if (region_space_->IsInUnevacFromSpace(to_ref)) { + // Mark the bitmap only in the GC thread here so that we don't need a CAS. + if (!kUseBakerReadBarrier || !region_space_bitmap_->Set(to_ref)) { + // It may be already marked if we accidentally pushed the same object twice due to the racy + // bitmap read in MarkUnevacFromSpaceRegion. + Scan(to_ref); + // Only add to the live bytes if the object was not already marked. + add_to_live_bytes = true; + } + } else { + Scan(to_ref); + } if (kUseBakerReadBarrier) { DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) << " " << to_ref << " " << to_ref->GetReadBarrierPointer() @@ -1332,7 +1343,7 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { DCHECK(!kUseBakerReadBarrier); #endif - if (region_space_->IsInUnevacFromSpace(to_ref)) { + if (add_to_live_bytes) { // Add to the live bytes per unevacuated from space. Note this code is always run by the // GC-running thread (no synchronization required). DCHECK(region_space_bitmap_->Test(to_ref)); @@ -1567,7 +1578,7 @@ void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset // OK. return; } else if (region_space_->IsInUnevacFromSpace(ref)) { - CHECK(region_space_bitmap_->Test(ref)) << ref; + CHECK(IsMarkedInUnevacFromSpace(ref)) << ref; } else if (region_space_->IsInFromSpace(ref)) { // Not OK. Do extra logging. if (obj != nullptr) { @@ -1614,7 +1625,7 @@ void ConcurrentCopying::AssertToSpaceInvariant(GcRootSource* gc_root_source, // OK. return; } else if (region_space_->IsInUnevacFromSpace(ref)) { - CHECK(region_space_bitmap_->Test(ref)) << ref; + CHECK(IsMarkedInUnevacFromSpace(ref)) << ref; } else if (region_space_->IsInFromSpace(ref)) { // Not OK. Do extra logging. if (gc_root_source == nullptr) { @@ -1654,7 +1665,7 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset LOG(INFO) << "holder is in the to-space."; } else if (region_space_->IsInUnevacFromSpace(obj)) { LOG(INFO) << "holder is in the unevac from-space."; - if (region_space_bitmap_->Test(obj)) { + if (IsMarkedInUnevacFromSpace(obj)) { LOG(INFO) << "holder is marked in the region space bitmap."; } else { LOG(INFO) << "holder is not marked in the region space bitmap."; @@ -1783,7 +1794,7 @@ inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) DCHECK_EQ(Thread::Current(), thread_running_gc_); mirror::Object* ref = obj->GetFieldObject< mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset); - mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false>(ref); + mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false, /*kFromGCThread*/true>(ref); if (to_ref == ref) { return; } @@ -2126,7 +2137,7 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { heap_->non_moving_space_->HasAddress(to_ref)) << "from_ref=" << from_ref << " to_ref=" << to_ref; } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) { - if (region_space_bitmap_->Test(from_ref)) { + if (IsMarkedInUnevacFromSpace(from_ref)) { to_ref = from_ref; } else { to_ref = nullptr; diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 5b0e2d6274..97f45551cf 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -104,7 +104,7 @@ class ConcurrentCopying : public GarbageCollector { DCHECK(ref != nullptr); return IsMarked(ref) == ref; } - template<bool kGrayImmuneObject = true> + template<bool kGrayImmuneObject = true, bool kFromGCThread = false> ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); @@ -179,6 +179,8 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); + bool IsMarkedInUnevacFromSpace(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_); virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); void SweepSystemWeaks(Thread* self) diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 4505c249fe..c87312b655 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -454,8 +454,7 @@ class ImageSpaceLoader { const std::string& image_filename, bool is_zygote, bool is_global_cache, - bool is_system, - bool relocated_version_used, + bool validate_oat_file, std::string* error_msg) SHARED_REQUIRES(Locks::mutator_lock_) { // Note that we must not use the file descriptor associated with @@ -483,7 +482,7 @@ class ImageSpaceLoader { // file name. return Init(image_filename.c_str(), image_location, - !(is_system || relocated_version_used), + validate_oat_file, /* oat_file */nullptr, error_msg); } @@ -1197,9 +1196,9 @@ class ImageSpaceLoader { for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) { mirror::DexCache* dex_cache = dex_caches->Get<kVerifyNone, kWithoutReadBarrier>(i); // Fix up dex cache pointers. - GcRoot<mirror::String>* strings = dex_cache->GetStrings(); + mirror::StringDexCacheType* strings = dex_cache->GetStrings(); if (strings != nullptr) { - GcRoot<mirror::String>* new_strings = fixup_adapter.ForwardObject(strings); + mirror::StringDexCacheType* new_strings = fixup_adapter.ForwardObject(strings); if (strings != new_strings) { dex_cache->SetStrings(new_strings); } @@ -1473,8 +1472,7 @@ std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_locati cache_filename, is_zygote, is_global_cache, - /* is_system */ false, - /* relocated_version_used */ true, + /* validate_oat_file */ false, &local_error_msg); if (relocated_space != nullptr) { return relocated_space; @@ -1491,8 +1489,7 @@ std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_locati cache_filename, is_zygote, is_global_cache, - /* is_system */ false, - /* relocated_version_used */ true, + /* validate_oat_file */ true, &local_error_msg); if (cache_space != nullptr) { return cache_space; @@ -1512,8 +1509,7 @@ std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_locati system_filename, is_zygote, is_global_cache, - /* is_system */ true, - /* relocated_version_used */ false, + /* validate_oat_file */ false, &local_error_msg); if (system_space != nullptr) { return system_space; @@ -1538,8 +1534,7 @@ std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_locati cache_filename, is_zygote, is_global_cache, - /* is_system */ false, - /* relocated_version_used */ true, + /* validate_oat_file */ false, &local_error_msg); if (patched_space != nullptr) { return patched_space; @@ -1568,8 +1563,7 @@ std::unique_ptr<ImageSpace> ImageSpace::CreateBootImage(const char* image_locati cache_filename, is_zygote, is_global_cache, - /* is_system */ false, - /* relocated_version_used */ true, + /* validate_oat_file */ false, &local_error_msg); if (compiled_space != nullptr) { return compiled_space; diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index 716c23d1b0..40b71c40a4 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -70,6 +70,16 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_JNI_OFFSET_64), (static_cast<int DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_32), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k32).Int32Value()))) #define ART_METHOD_QUICK_CODE_OFFSET_64 48 DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_QUICK_CODE_OFFSET_64), (static_cast<int32_t>(art::ArtMethod:: EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).Int32Value()))) +#define ART_METHOD_DECLARING_CLASS_OFFSET 0 +DEFINE_CHECK_EQ(static_cast<int32_t>(ART_METHOD_DECLARING_CLASS_OFFSET), (static_cast<int32_t>(art::ArtMethod:: DeclaringClassOffset().Int32Value()))) +#define DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET 40 +DEFINE_CHECK_EQ(static_cast<int32_t>(DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET), (static_cast<int32_t>(art::mirror::Class:: DexCacheStringsOffset().Int32Value()))) +#define STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT 3 +DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT), (static_cast<int32_t>(art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair))))) +#define STRING_DEX_CACHE_SIZE_MINUS_ONE 1023 +DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_SIZE_MINUS_ONE), (static_cast<int32_t>(art::mirror::DexCache::kDexCacheStringCacheSize - 1))) +#define STRING_DEX_CACHE_HASH_BITS 10 +DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize)))) #define MIN_LARGE_OBJECT_THRESHOLD 0x3000 DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold))) #define LOCK_WORD_STATE_SHIFT 30 diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 9895395169..4005f054d5 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -223,6 +223,12 @@ class EndianOutput { HandleU1List(values, count); length_ += count; } + void AddU1AsU2List(const uint8_t* values, size_t count) { + HandleU1AsU2List(values, count); + // Array of char from compressed String (8-bit) is added as 16-bit blocks + int ceil_count_to_even = count + ((count & 1) ? 1 : 0); + length_ += ceil_count_to_even * sizeof(uint8_t); + } void AddU2List(const uint16_t* values, size_t count) { HandleU2List(values, count); length_ += count * sizeof(uint16_t); @@ -268,6 +274,9 @@ class EndianOutput { virtual void HandleU1List(const uint8_t* values ATTRIBUTE_UNUSED, size_t count ATTRIBUTE_UNUSED) { } + virtual void HandleU1AsU2List(const uint8_t* values ATTRIBUTE_UNUSED, + size_t count ATTRIBUTE_UNUSED) { + } virtual void HandleU2List(const uint16_t* values ATTRIBUTE_UNUSED, size_t count ATTRIBUTE_UNUSED) { } @@ -308,6 +317,19 @@ class EndianOutputBuffered : public EndianOutput { buffer_.insert(buffer_.end(), values, values + count); } + void HandleU1AsU2List(const uint8_t* values, size_t count) OVERRIDE { + DCHECK_EQ(length_, buffer_.size()); + // All 8-bits are grouped in 2 to make 16-bit block like Java Char + if (count & 1) { + buffer_.push_back(0); + } + for (size_t i = 0; i < count; ++i) { + uint8_t value = *values; + buffer_.push_back(value); + values++; + } + } + void HandleU2List(const uint16_t* values, size_t count) OVERRIDE { DCHECK_EQ(length_, buffer_.size()); for (size_t i = 0; i < count; ++i) { @@ -1354,7 +1376,11 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { string_value = reinterpret_cast<mirror::Object*>( reinterpret_cast<uintptr_t>(s) + kObjectAlignment); } else { - string_value = reinterpret_cast<mirror::Object*>(s->GetValue()); + if (s->IsCompressed()) { + string_value = reinterpret_cast<mirror::Object*>(s->GetValueCompressed()); + } else { + string_value = reinterpret_cast<mirror::Object*>(s->GetValue()); + } } __ AddObjectId(string_value); } @@ -1369,12 +1395,18 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { CHECK_EQ(obj->IsString(), string_value != nullptr); if (string_value != nullptr) { mirror::String* s = obj->AsString(); + // Compressed string's (8-bit) length is ceil(length/2) in 16-bit blocks + int length_in_16_bit = (s->IsCompressed()) ? ((s->GetLength() + 1) / 2) : s->GetLength(); __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(string_value); __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); - __ AddU4(s->GetLength()); + __ AddU4(length_in_16_bit); __ AddU1(hprof_basic_char); - __ AddU2List(s->GetValue(), s->GetLength()); + if (s->IsCompressed()) { + __ AddU1AsU2List(s->GetValueCompressed(), s->GetLength()); + } else { + __ AddU2List(s->GetValue(), s->GetLength()); + } } } diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index eceb593e08..1940d67316 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -386,8 +386,23 @@ bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a, if (a_length != b.GetUtf16Length()) { return false; } - const uint16_t* a_value = a_string->GetValue(); - return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0; + if (a_string->IsCompressed()) { + size_t b_byte_count = strlen(b.GetUtf8Data()); + size_t b_utf8_length = CountModifiedUtf8Chars(b.GetUtf8Data(), b_byte_count); + // Modified UTF-8 single byte character range is 0x01 .. 0x7f + // The string compression occurs on regular ASCII with same exact range, + // not on extended ASCII which up to 0xff + const bool is_b_regular_ascii = (b_byte_count == b_utf8_length); + if (is_b_regular_ascii) { + return memcmp(b.GetUtf8Data(), + a_string->GetValueCompressed(), a_length * sizeof(uint8_t)) == 0; + } else { + return false; + } + } else { + const uint16_t* a_value = a_string->GetValue(); + return CompareModifiedUtf8ToUtf16AsCodePointValues(b.GetUtf8Data(), a_value, a_length) == 0; + } } size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) { diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index f1f7f42117..101c9a1438 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -20,6 +20,9 @@ #include "common_throws.h" #include "interpreter_common.h" +#include "interpreter_goto_table_impl.h" +#include "interpreter_mterp_impl.h" +#include "interpreter_switch_impl.h" #include "mirror/string-inl.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" @@ -242,28 +245,6 @@ static std::ostream& operator<<(std::ostream& os, const InterpreterImplKind& rhs static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind; -#if defined(__clang__) -// Clang 3.4 fails to build the goto interpreter implementation. -template<bool do_access_check, bool transaction_active> -JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) { - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); -} -// Explicit definitions of ExecuteGotoImpl. -template<> SHARED_REQUIRES(Locks::mutator_lock_) -JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register); -template<> SHARED_REQUIRES(Locks::mutator_lock_) -JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register); -template<> SHARED_REQUIRES(Locks::mutator_lock_) -JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register); -template<> SHARED_REQUIRES(Locks::mutator_lock_) -JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register); -#endif - static inline JValue Execute( Thread* self, const DexFile::CodeItem* code_item, diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 4fd1514e39..7b38473dd7 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -23,6 +23,7 @@ #include <iostream> #include <sstream> +#include <atomic> #include "art_field-inl.h" #include "art_method-inl.h" @@ -37,6 +38,8 @@ #include "handle_scope-inl.h" #include "jit/jit.h" #include "mirror/class-inl.h" +#include "mirror/dex_cache.h" +#include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" #include "mirror/string-inl.h" @@ -62,21 +65,6 @@ using ::art::mirror::Throwable; namespace art { namespace interpreter { -// External references to all interpreter implementations. - -template<bool do_access_check, bool transaction_active> -extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register, - bool interpret_one_instruction); - -template<bool do_access_check, bool transaction_active> -extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame& shadow_frame, JValue result_register); - -// Mterp does not support transactions or access check, thus no templated versions. -extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item, - ShadowFrame* shadow_frame, JValue* result_register); - void ThrowNullPointerExceptionFromInterpreter() SHARED_REQUIRES(Locks::mutator_lock_); @@ -264,15 +252,20 @@ static inline String* ResolveString(Thread* self, ShadowFrame& shadow_frame, uin ArtMethod* method = shadow_frame.GetMethod(); mirror::Class* declaring_class = method->GetDeclaringClass(); // MethodVerifier refuses methods with string_idx out of bounds. - DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings()); - mirror::String* s = declaring_class->GetDexCacheStrings()[string_idx].Read(); - if (UNLIKELY(s == nullptr)) { + DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize, + declaring_class->GetDexFile().NumStringIds()); + mirror::String* string_ptr = + mirror::StringDexCachePair::LookupString(declaring_class->GetDexCacheStrings(), + string_idx, + mirror::DexCache::kDexCacheStringCacheSize).Read(); + if (UNLIKELY(string_ptr == nullptr)) { StackHandleScope<1> hs(self); Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache())); - s = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), string_idx, - dex_cache); + string_ptr = Runtime::Current()->GetClassLinker()->ResolveString(*method->GetDexFile(), + string_idx, + dex_cache); } - return s; + return string_ptr; } // Handles div-int, div-int/2addr, div-int/li16 and div-int/lit8 instructions. @@ -442,7 +435,7 @@ static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruc oss << StringPrintf(" vreg%u=0x%08X", i, raw_value); if (ref_value != nullptr) { if (ref_value->GetClass()->IsStringClass() && - ref_value->AsString()->GetValue() != nullptr) { + !ref_value->AsString()->IsValueNull()) { oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\""; } else { oss << "/" << PrettyTypeOf(ref_value); diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 43b27781e4..37dd63b4d8 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -14,18 +14,29 @@ * limitations under the License. */ -#if !defined(__clang__) -// Clang 3.4 fails to build the goto interpreter implementation. +#include "interpreter_goto_table_impl.h" +// Common includes +#include "base/logging.h" +#include "base/macros.h" +#include "base/mutex.h" +#include "stack.h" +#include "thread.h" +// Clang compiles the GOTO interpreter very slowly. So we skip it. These are the implementation +// details only necessary when compiling it. +#if !defined(__clang__) #include "experimental_flags.h" #include "interpreter_common.h" #include "jit/jit.h" #include "safe_math.h" +#endif namespace art { namespace interpreter { +#if !defined(__clang__) + // In the following macros, we expect the following local variables exist: // - "self": the current Thread*. // - "inst" : the current Instruction*. @@ -530,8 +541,7 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<do_access_check, true>( inst->VRegB_21c(), shadow_frame.GetMethod(), self, @@ -2558,20 +2568,40 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } // NOLINT(readability/fn_size) // Explicit definitions of ExecuteGotoImpl. -template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR +template HOT_ATTR JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register); -template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR +template HOT_ATTR JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register); -template SHARED_REQUIRES(Locks::mutator_lock_) +template JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register); -template SHARED_REQUIRES(Locks::mutator_lock_) +template JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register); -} // namespace interpreter -} // namespace art +#else +template<bool do_access_check, bool transaction_active> +JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) { + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); +} +// Explicit definitions of ExecuteGotoImpl. +template<> +JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, JValue result_register); +template<> +JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, JValue result_register); +template<> +JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, JValue result_register); +template<> +JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, JValue result_register); #endif + +} // namespace interpreter +} // namespace art diff --git a/runtime/interpreter/interpreter_goto_table_impl.h b/runtime/interpreter/interpreter_goto_table_impl.h new file mode 100644 index 0000000000..bb9be881fe --- /dev/null +++ b/runtime/interpreter/interpreter_goto_table_impl.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_ +#define ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_ + +#include "base/macros.h" +#include "base/mutex.h" +#include "dex_file.h" +#include "jvalue.h" + +namespace art { + +class ShadowFrame; +class Thread; + +namespace interpreter { + +template<bool do_access_check, bool transaction_active> +JValue ExecuteGotoImpl(Thread* self, + const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, + JValue result_register) SHARED_REQUIRES(Locks::mutator_lock_); + +} // namespace interpreter +} // namespace art + +#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_GOTO_TABLE_IMPL_H_ diff --git a/runtime/interpreter/interpreter_mterp_impl.h b/runtime/interpreter/interpreter_mterp_impl.h new file mode 100644 index 0000000000..322df4e9e0 --- /dev/null +++ b/runtime/interpreter/interpreter_mterp_impl.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_ +#define ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_ + +#include "base/macros.h" +#include "base/mutex.h" +#include "dex_file.h" +#include "jvalue.h" + +namespace art { + +class ShadowFrame; +class Thread; + +namespace interpreter { + +// Mterp does not support transactions or access check, thus no templated versions. +extern "C" bool ExecuteMterpImpl(Thread* self, + const DexFile::CodeItem* code_item, + ShadowFrame* shadow_frame, + JValue* result_register) SHARED_REQUIRES(Locks::mutator_lock_); + +} // namespace interpreter +} // namespace art + +#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_MTERP_IMPL_H_ diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index a6349fcf88..227130e7d8 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "interpreter_switch_impl.h" + #include "base/enums.h" #include "experimental_flags.h" #include "interpreter_common.h" @@ -477,8 +479,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<do_access_check, true>( inst->VRegB_21c(), shadow_frame.GetMethod(), self, @@ -622,10 +623,8 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, break; } -#if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wfloat-equal" -#endif case Instruction::CMPL_FLOAT: { PREAMBLE(); @@ -693,9 +692,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, break; } -#if defined(__clang__) #pragma clang diagnostic pop -#endif case Instruction::CMP_LONG: { PREAMBLE(); @@ -2337,19 +2334,19 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, } // NOLINT(readability/fn_size) // Explicit definitions of ExecuteSwitchImpl. -template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR +template HOT_ATTR JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register, bool interpret_one_instruction); -template SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR +template HOT_ATTR JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register, bool interpret_one_instruction); -template SHARED_REQUIRES(Locks::mutator_lock_) +template JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register, bool interpret_one_instruction); -template SHARED_REQUIRES(Locks::mutator_lock_) +template JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register, bool interpret_one_instruction); diff --git a/runtime/interpreter/interpreter_switch_impl.h b/runtime/interpreter/interpreter_switch_impl.h new file mode 100644 index 0000000000..90ec908678 --- /dev/null +++ b/runtime/interpreter/interpreter_switch_impl.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_ +#define ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_ + +#include "base/macros.h" +#include "base/mutex.h" +#include "dex_file.h" +#include "jvalue.h" + +namespace art { + +class ShadowFrame; +class Thread; + +namespace interpreter { + +template<bool do_access_check, bool transaction_active> +JValue ExecuteSwitchImpl(Thread* self, + const DexFile::CodeItem* code_item, + ShadowFrame& shadow_frame, + JValue result_register, + bool interpret_one_instruction) SHARED_REQUIRES(Locks::mutator_lock_); + +} // namespace interpreter +} // namespace art + +#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_SWITCH_IMPL_H_ diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index c25cd78309..20a0753dd9 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -358,8 +358,7 @@ extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint if (LIKELY(c != nullptr)) { if (UNLIKELY(c->IsStringClass())) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - obj = String::Alloc<true>(self, 0, allocator_type, visitor); + obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { obj = AllocObjectFromCode<false, true>( inst->VRegB_21c(), shadow_frame->GetMethod(), self, diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc index 7e1f7950eb..c3246008a1 100644 --- a/runtime/interpreter/unstarted_runtime_test.cc +++ b/runtime/interpreter/unstarted_runtime_test.cc @@ -401,8 +401,23 @@ TEST_F(UnstartedRuntimeTest, StringInit) { interpreter::DoCall<false, false>(method, self, *shadow_frame, inst, inst_data[0], &result); mirror::String* string_result = reinterpret_cast<mirror::String*>(result.GetL()); EXPECT_EQ(string_arg->GetLength(), string_result->GetLength()); - EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(), - string_arg->GetLength() * sizeof(uint16_t)), 0); + + if (string_arg->IsCompressed() && string_result->IsCompressed()) { + EXPECT_EQ(memcmp(string_arg->GetValueCompressed(), string_result->GetValueCompressed(), + string_arg->GetLength() * sizeof(uint8_t)), 0); + } else if (!string_arg->IsCompressed() && !string_result->IsCompressed()) { + EXPECT_EQ(memcmp(string_arg->GetValue(), string_result->GetValue(), + string_arg->GetLength() * sizeof(uint16_t)), 0); + } else { + bool equal = true; + for (int i = 0; i < string_arg->GetLength(); ++i) { + if (string_arg->CharAt(i) != string_result->CharAt(i)) { + equal = false; + break; + } + } + EXPECT_EQ(equal, true); + } ShadowFrame::DeleteDeoptimizedFrame(shadow_frame); } diff --git a/runtime/jdwp/jdwp_bits.h b/runtime/jdwp/jdwp_bits.h index f9cf9ca0d9..33b98f3efe 100644 --- a/runtime/jdwp/jdwp_bits.h +++ b/runtime/jdwp/jdwp_bits.h @@ -59,13 +59,22 @@ static inline void Append8BE(std::vector<uint8_t>& bytes, uint64_t value) { bytes.push_back(static_cast<uint8_t>(value)); } -static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, size_t char_count) { +static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, + size_t char_count) { Append4BE(bytes, char_count); for (size_t i = 0; i < char_count; ++i) { Append2BE(bytes, chars[i]); } } +static inline void AppendUtf16CompressedBE(std::vector<uint8_t>& bytes, + const uint8_t* chars, size_t char_count) { + Append4BE(bytes, char_count); + for (size_t i = 0; i < char_count; ++i) { + Append2BE(bytes, static_cast<uint16_t>(chars[i])); + } +} + // @deprecated static inline void Set1(uint8_t* buf, uint8_t val) { *buf = val; diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc index b35c958b0b..927681c8d1 100644 --- a/runtime/jit/profile_saver.cc +++ b/runtime/jit/profile_saver.cc @@ -63,19 +63,6 @@ ProfileSaver::ProfileSaver(const ProfileSaverOptions& options, options_(options) { DCHECK(options_.IsEnabled()); AddTrackedLocations(output_filename, app_data_dir, code_paths); - if (!app_data_dir.empty()) { - // The application directory is used to determine which dex files are owned by app. - // Since it could be a symlink (e.g. /data/data instead of /data/user/0), and we - // don't have control over how the dex files are actually loaded (symlink or canonical path), - // store it's canonical form to be sure we use the same base when comparing. - UniqueCPtr<const char[]> app_data_dir_real_path(realpath(app_data_dir.c_str(), nullptr)); - if (app_data_dir_real_path != nullptr) { - app_data_dirs_.emplace(app_data_dir_real_path.get()); - } else { - LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir - << ". The app dir will not be used to determine which dex files belong to the app"; - } - } } void ProfileSaver::Run() { @@ -498,12 +485,18 @@ void ProfileSaver::AddTrackedLocations(const std::string& output_filename, if (it == tracked_dex_base_locations_.end()) { tracked_dex_base_locations_.Put(output_filename, std::set<std::string>(code_paths.begin(), code_paths.end())); - app_data_dirs_.insert(app_data_dir); + if (!app_data_dir.empty()) { + app_data_dirs_.insert(app_data_dir); + } } else { it->second.insert(code_paths.begin(), code_paths.end()); } } +// TODO(calin): This may lead to several calls to realpath. +// Consider moving the logic to the saver thread (i.e. when notified, +// only cache the location, and then wake up the saver thread to do the +// comparisons with the real file paths and to create the markers). void ProfileSaver::NotifyDexUse(const std::string& dex_location) { if (!ShouldProfileLocation(dex_location)) { return; @@ -536,63 +529,32 @@ void ProfileSaver::NotifyDexUse(const std::string& dex_location) { } } -bool ProfileSaver::MaybeRecordDexUseInternal( - const std::string& dex_location, - const std::set<std::string>& app_code_paths, - const std::string& foreign_dex_profile_path, - const std::set<std::string>& app_data_dirs) { - if (dex_location.empty()) { - LOG(WARNING) << "Asked to record foreign dex use with an empty dex location."; - return false; - } - if (foreign_dex_profile_path.empty()) { - LOG(WARNING) << "Asked to record foreign dex use without a valid profile path "; - return false; - } - - UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr)); - if (dex_location_real_path == nullptr) { - PLOG(WARNING) << "Could not get realpath for " << dex_location; - } - std::string dex_location_real_path_str((dex_location_real_path == nullptr) - ? dex_location.c_str() - : dex_location_real_path.get()); - - if (app_data_dirs.find(dex_location_real_path_str) != app_data_dirs.end()) { - // The dex location is under the application folder. Nothing to record. - return false; - } - - if (app_code_paths.find(dex_location) != app_code_paths.end()) { - // The dex location belongs to the application code paths. Nothing to record. - return false; - } - // Do another round of checks with the real paths. - // Note that we could cache all the real locations in the saver (since it's an expensive - // operation). However we expect that app_code_paths is small (usually 1 element), and - // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise - // to save some bytes of memory usage. - for (const auto& app_code_location : app_code_paths) { - UniqueCPtr<const char[]> real_app_code_location(realpath(app_code_location.c_str(), nullptr)); - if (real_app_code_location == nullptr) { - PLOG(WARNING) << "Could not get realpath for " << app_code_location; +static bool CheckContainsWithRealPath(const std::set<std::string>& paths_set, + const std::string& path_to_check) { + for (const auto& path : paths_set) { + UniqueCPtr<const char[]> real_path(realpath(path.c_str(), nullptr)); + if (real_path == nullptr) { + PLOG(WARNING) << "Could not get realpath for " << path; + continue; } - std::string real_app_code_location_str((real_app_code_location == nullptr) - ? app_code_location.c_str() - : real_app_code_location.get()); - if (real_app_code_location_str == dex_location_real_path_str) { - // The dex location belongs to the application code paths. Nothing to record. - return false; + std::string real_path_str(real_path.get()); + if (real_path_str == path_to_check) { + return true; } } + return false; +} +// After the call, dex_location_real_path will contain the marker's name. +static bool CreateForeignDexMarker(const std::string& foreign_dex_profile_path, + /*in-out*/ std::string* dex_location_real_path) { // For foreign dex files we record a flag on disk. PackageManager will (potentially) take this // into account when deciding how to optimize the loaded dex file. // The expected flag name is the canonical path of the apk where '/' is substituted to '@'. // (it needs to be kept in sync with // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java) - std::replace(dex_location_real_path_str.begin(), dex_location_real_path_str.end(), '/', '@'); - std::string flag_path = foreign_dex_profile_path + "/" + dex_location_real_path_str; + std::replace(dex_location_real_path->begin(), dex_location_real_path->end(), '/', '@'); + std::string flag_path = foreign_dex_profile_path + "/" + *dex_location_real_path; // We use O_RDONLY as the access mode because we must supply some access // mode, and there is no access mode that means 'create but do not read' the // file. We will not not actually read from the file. @@ -614,6 +576,57 @@ bool ProfileSaver::MaybeRecordDexUseInternal( } } +bool ProfileSaver::MaybeRecordDexUseInternal( + const std::string& dex_location, + const std::set<std::string>& app_code_paths, + const std::string& foreign_dex_profile_path, + const std::set<std::string>& app_data_dirs) { + if (dex_location.empty()) { + LOG(WARNING) << "Asked to record foreign dex use with an empty dex location."; + return false; + } + if (foreign_dex_profile_path.empty()) { + LOG(WARNING) << "Asked to record foreign dex use without a valid profile path "; + return false; + } + + if (app_code_paths.find(dex_location) != app_code_paths.end()) { + // The dex location belongs to the application code paths. Nothing to record. + return false; + } + + if (app_data_dirs.find(dex_location) != app_data_dirs.end()) { + // The dex location is under the application folder. Nothing to record. + return false; + } + + // Do another round of checks with the real paths. + // Application directory could be a symlink (e.g. /data/data instead of /data/user/0), and we + // don't have control over how the dex files are actually loaded (symlink or canonical path), + + // Note that we could cache all the real locations in the saver (since it's an expensive + // operation). However we expect that app_code_paths is small (usually 1 element), and + // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise + // to save some bytes of memory usage. + + UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr)); + if (dex_location_real_path == nullptr) { + PLOG(WARNING) << "Could not get realpath for " << dex_location; + return false; + } + std::string dex_location_real_path_str(dex_location_real_path.get()); + + if (CheckContainsWithRealPath(app_code_paths, dex_location_real_path_str)) { + return false; + } + + if (CheckContainsWithRealPath(app_data_dirs, dex_location_real_path_str)) { + return false; + } + + return CreateForeignDexMarker(foreign_dex_profile_path, &dex_location_real_path_str); +} + void ProfileSaver::DumpInstanceInfo(std::ostream& os) { MutexLock mu(Thread::Current(), *Locks::profiler_lock_); if (instance_ != nullptr) { diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index c3224757d8..7bcadd8b78 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -592,9 +592,8 @@ class JNI { } if (c->IsStringClass()) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - return soa.AddLocalReference<jobject>(mirror::String::Alloc<true>(soa.Self(), 0, - allocator_type, visitor)); + return soa.AddLocalReference<jobject>(mirror::String::AllocEmptyString<true>(soa.Self(), + allocator_type)); } return soa.AddLocalReference<jobject>(c->AllocObject(soa.Self())); } @@ -1673,8 +1672,14 @@ class JNI { ThrowSIOOBE(soa, start, length, s->GetLength()); } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); - const jchar* chars = s->GetValue(); - memcpy(buf, chars + start, length * sizeof(jchar)); + if (s->IsCompressed()) { + for (int i = 0; i < length; ++i) { + buf[i] = static_cast<jchar>(s->CharAt(start+i)); + } + } else { + const jchar* chars = static_cast<jchar*>(s->GetValue()); + memcpy(buf, chars + start, length * sizeof(jchar)); + } } } @@ -1687,9 +1692,15 @@ class JNI { ThrowSIOOBE(soa, start, length, s->GetLength()); } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); - const jchar* chars = s->GetValue(); - size_t bytes = CountUtf8Bytes(chars + start, length); - ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); + if (s->IsCompressed()) { + for (int i = 0; i < length; ++i) { + buf[i] = s->CharAt(start+i); + } + } else { + const jchar* chars = s->GetValue(); + size_t bytes = CountUtf8Bytes(chars + start, length); + ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); + } } } @@ -1698,9 +1709,16 @@ class JNI { ScopedObjectAccess soa(env); mirror::String* s = soa.Decode<mirror::String*>(java_string); gc::Heap* heap = Runtime::Current()->GetHeap(); - if (heap->IsMovableObject(s)) { + if (heap->IsMovableObject(s) || s->IsCompressed()) { jchar* chars = new jchar[s->GetLength()]; - memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength()); + if (s->IsCompressed()) { + int32_t length = s->GetLength(); + for (int i = 0; i < length; ++i) { + chars[i] = s->CharAt(i); + } + } else { + memcpy(chars, s->GetValue(), sizeof(jchar) * s->GetLength()); + } if (is_copy != nullptr) { *is_copy = JNI_TRUE; } @@ -1716,7 +1734,7 @@ class JNI { CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string); ScopedObjectAccess soa(env); mirror::String* s = soa.Decode<mirror::String*>(java_string); - if (chars != s->GetValue()) { + if (s->IsCompressed() || (s->IsCompressed() == false && chars != s->GetValue())) { delete[] chars; } } @@ -1737,15 +1755,27 @@ class JNI { heap->IncrementDisableThreadFlip(soa.Self()); } } - if (is_copy != nullptr) { - *is_copy = JNI_FALSE; + if (s->IsCompressed()) { + if (is_copy != nullptr) { + *is_copy = JNI_TRUE; + } + int32_t length = s->GetLength(); + jchar* chars = new jchar[length]; + for (int i = 0; i < length; ++i) { + chars[i] = s->CharAt(i); + } + return chars; + } else { + if (is_copy != nullptr) { + *is_copy = JNI_FALSE; + } + return static_cast<jchar*>(s->GetValue()); } - return static_cast<jchar*>(s->GetValue()); } static void ReleaseStringCritical(JNIEnv* env, jstring java_string, - const jchar* chars ATTRIBUTE_UNUSED) { + const jchar* chars) { CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string); ScopedObjectAccess soa(env); gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -1757,6 +1787,9 @@ class JNI { heap->DecrementDisableThreadFlip(soa.Self()); } } + if (s->IsCompressed() || (s->IsCompressed() == false && s->GetValue() != chars)) { + delete[] chars; + } } static const char* GetStringUTFChars(JNIEnv* env, jstring java_string, jboolean* is_copy) { @@ -1771,8 +1804,14 @@ class JNI { size_t byte_count = s->GetUtfLength(); char* bytes = new char[byte_count + 1]; CHECK(bytes != nullptr); // bionic aborts anyway. - const uint16_t* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); + if (s->IsCompressed()) { + for (size_t i = 0; i < byte_count; ++i) { + bytes[i] = s->CharAt(i); + } + } else { + const uint16_t* chars = s->GetValue(); + ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); + } bytes[byte_count] = '\0'; return bytes; } diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc index 04ba8dfc64..64954743d4 100644 --- a/runtime/jni_internal_test.cc +++ b/runtime/jni_internal_test.cc @@ -880,8 +880,15 @@ TEST_F(JniInternalTest, FromReflectedField_ToReflectedField) { ASSERT_NE(fid2, nullptr); // Make sure we can actually use it. jstring s = env_->NewStringUTF("poop"); - ASSERT_EQ(4, env_->GetIntField(s, fid2)); - + if (mirror::kUseStringCompression) { + // Negative because s is compressed (first bit is 1) + ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2)); + // Create incompressible string + jstring s_16 = env_->NewStringUTF("\u0444\u0444"); + ASSERT_EQ(2, env_->GetIntField(s_16, fid2)); + } else { + ASSERT_EQ(4, env_->GetIntField(s, fid2)); + } // Bad arguments. GetFromReflectedField_ToReflectedFieldBadArgumentTest(false); GetFromReflectedField_ToReflectedFieldBadArgumentTest(true); @@ -1632,13 +1639,28 @@ TEST_F(JniInternalTest, GetStringCritical_ReleaseStringCritical) { jboolean is_copy = JNI_TRUE; chars = env_->GetStringCritical(s, &is_copy); - EXPECT_EQ(JNI_FALSE, is_copy); + if (mirror::kUseStringCompression) { + // is_copy has to be JNI_TRUE because "hello" is all-ASCII + EXPECT_EQ(JNI_TRUE, is_copy); + } else { + EXPECT_EQ(JNI_FALSE, is_copy); + } EXPECT_EQ(expected[0], chars[0]); EXPECT_EQ(expected[1], chars[1]); EXPECT_EQ(expected[2], chars[2]); EXPECT_EQ(expected[3], chars[3]); EXPECT_EQ(expected[4], chars[4]); env_->ReleaseStringCritical(s, chars); + + if (mirror::kUseStringCompression) { + // is_copy has to be JNI_FALSE because "\xed\xa0\x81\xed\xb0\x80" is incompressible + jboolean is_copy_16 = JNI_TRUE; + jstring s_16 = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80"); + chars = env_->GetStringCritical(s_16, &is_copy_16); + EXPECT_EQ(2, env_->GetStringLength(s_16)); + EXPECT_EQ(4, env_->GetStringUTFLength(s_16)); + env_->ReleaseStringCritical(s_16, chars); + } } TEST_F(JniInternalTest, GetObjectArrayElement_SetObjectArrayElement) { diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 8ad47eb799..0f2aac2790 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -26,7 +26,6 @@ #include "base/length_prefixed_array.h" #include "class_loader.h" #include "common_throws.h" -#include "dex_cache.h" #include "dex_file.h" #include "gc/heap-inl.h" #include "iftable.h" @@ -899,12 +898,12 @@ inline uint32_t Class::NumDirectInterfaces() { } } -inline void Class::SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings) { +inline void Class::SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings) { SetFieldPtr<false>(DexCacheStringsOffset(), new_dex_cache_strings); } -inline GcRoot<String>* Class::GetDexCacheStrings() { - return GetFieldPtr<GcRoot<String>*>(DexCacheStringsOffset()); +inline StringDexCacheType* Class::GetDexCacheStrings() { + return GetFieldPtr64<StringDexCacheType*>(DexCacheStringsOffset()); } template<ReadBarrierOption kReadBarrierOption, class Visitor> @@ -1058,8 +1057,8 @@ inline void Class::FixupNativePointers(mirror::Class* dest, dest->SetMethodsPtrInternal(new_methods); } // Update dex cache strings. - GcRoot<mirror::String>* strings = GetDexCacheStrings(); - GcRoot<mirror::String>* new_strings = visitor(strings); + StringDexCacheType* strings = GetDexCacheStrings(); + StringDexCacheType* new_strings = visitor(strings); if (strings != new_strings) { dest->SetDexCacheStrings(new_strings); } diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 978fc4cbbf..e2cd649d99 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -54,6 +54,9 @@ class Constructor; class DexCache; class IfTable; class Method; +struct StringDexCachePair; + +using StringDexCacheType = std::atomic<mirror::StringDexCachePair>; // C++ mirror of java.lang.Class class MANAGED Class FINAL : public Object { @@ -1219,8 +1222,8 @@ class MANAGED Class FINAL : public Object { bool GetSlowPathEnabled() SHARED_REQUIRES(Locks::mutator_lock_); void SetSlowPath(bool enabled) SHARED_REQUIRES(Locks::mutator_lock_); - GcRoot<String>* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_); - void SetDexCacheStrings(GcRoot<String>* new_dex_cache_strings) + StringDexCacheType* GetDexCacheStrings() SHARED_REQUIRES(Locks::mutator_lock_); + void SetDexCacheStrings(StringDexCacheType* new_dex_cache_strings) SHARED_REQUIRES(Locks::mutator_lock_); static MemberOffset DexCacheStringsOffset() { return OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_strings_); diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h index 84469ea868..a3071b7f63 100644 --- a/runtime/mirror/dex_cache-inl.h +++ b/runtime/mirror/dex_cache-inl.h @@ -27,6 +27,8 @@ #include "mirror/class.h" #include "runtime.h" +#include <atomic> + namespace art { namespace mirror { @@ -35,15 +37,18 @@ inline uint32_t DexCache::ClassSize(PointerSize pointer_size) { return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size); } -inline String* DexCache::GetResolvedString(uint32_t string_idx) { - DCHECK_LT(string_idx, NumStrings()); - return GetStrings()[string_idx].Read(); +inline mirror::String* DexCache::GetResolvedString(uint32_t string_idx) { + DCHECK_LT(string_idx, GetDexFile()->NumStringIds()); + return StringDexCachePair::LookupString(GetStrings(), string_idx, NumStrings()).Read(); } -inline void DexCache::SetResolvedString(uint32_t string_idx, String* resolved) { - DCHECK_LT(string_idx, NumStrings()); +inline void DexCache::SetResolvedString(uint32_t string_idx, mirror::String* resolved) { + DCHECK_LT(string_idx % NumStrings(), NumStrings()); // TODO default transaction support. - GetStrings()[string_idx] = GcRoot<String>(resolved); + StringDexCachePair idx_ptr; + idx_ptr.string_index = string_idx; + idx_ptr.string_pointer = GcRoot<String>(resolved); + GetStrings()[string_idx % NumStrings()].store(idx_ptr, std::memory_order_relaxed); // TODO: Fine-grained marking, so that we don't need to go through all arrays in full. Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this); } @@ -131,9 +136,16 @@ inline void DexCache::VisitReferences(mirror::Class* klass, const Visitor& visit VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor); // Visit arrays after. if (kVisitNativeRoots) { - GcRoot<mirror::String>* strings = GetStrings(); + mirror::StringDexCacheType* strings = GetStrings(); for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) { - visitor.VisitRootIfNonNull(strings[i].AddressWithoutBarrier()); + StringDexCachePair source = strings[i].load(std::memory_order_relaxed); + mirror::String* before = source.string_pointer.Read<kReadBarrierOption>(); + GcRoot<mirror::String> root(before); + visitor.VisitRootIfNonNull(root.AddressWithoutBarrier()); + if (root.Read() != before) { + source.string_pointer = GcRoot<String>(root.Read()); + strings[i].store(source, std::memory_order_relaxed); + } } GcRoot<mirror::Class>* resolved_types = GetResolvedTypes(); for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) { @@ -143,12 +155,14 @@ inline void DexCache::VisitReferences(mirror::Class* klass, const Visitor& visit } template <ReadBarrierOption kReadBarrierOption, typename Visitor> -inline void DexCache::FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor) { - GcRoot<mirror::String>* src = GetStrings(); +inline void DexCache::FixupStrings(mirror::StringDexCacheType* dest, const Visitor& visitor) { + mirror::StringDexCacheType* src = GetStrings(); for (size_t i = 0, count = NumStrings(); i < count; ++i) { - mirror::String* source = src[i].Read<kReadBarrierOption>(); - mirror::String* new_source = visitor(source); - dest[i] = GcRoot<mirror::String>(new_source); + StringDexCachePair source = src[i].load(std::memory_order_relaxed); + mirror::String* ptr = source.string_pointer.Read<kReadBarrierOption>(); + mirror::String* new_source = visitor(ptr); + source.string_pointer = GcRoot<String>(new_source); + dest[i].store(source, std::memory_order_relaxed); } } diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc index 57066d8376..cfcec9cd3a 100644 --- a/runtime/mirror/dex_cache.cc +++ b/runtime/mirror/dex_cache.cc @@ -33,7 +33,7 @@ namespace mirror { void DexCache::Init(const DexFile* dex_file, String* location, - GcRoot<String>* strings, + StringDexCacheType* strings, uint32_t num_strings, GcRoot<Class>* resolved_types, uint32_t num_resolved_types, diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h index d02a0d8e2f..4ddfc7bde9 100644 --- a/runtime/mirror/dex_cache.h +++ b/runtime/mirror/dex_cache.h @@ -35,12 +35,61 @@ namespace mirror { class String; +struct PACKED(8) StringDexCachePair { + GcRoot<String> string_pointer; + uint32_t string_index; + // The array is initially [ {0,0}, {0,0}, {0,0} ... ] + // We maintain the invariant that once a dex cache entry is populated, + // the pointer is always non-0 + // Any given entry would thus be: + // {non-0, non-0} OR {0,0} + // + // It's generally sufficiently enough then to check if the + // lookup string index matches the stored string index (for a >0 string index) + // because if it's true the pointer is also non-null. + // + // For the 0th entry which is a special case, the value is either + // {0,0} (initial state) or {non-0, 0} which indicates + // that a valid string is stored at that index for a dex string id of 0. + // + // As an optimization, we want to avoid branching on the string pointer since + // it's always non-null if the string id branch succeeds (except for the 0th string id). + // Set the initial state for the 0th entry to be {0,1} which is guaranteed to fail + // the lookup string id == stored id branch. + static void Initialize(StringDexCacheType* strings) { + mirror::StringDexCachePair first_elem; + first_elem.string_pointer = GcRoot<String>(nullptr); + first_elem.string_index = 1; + strings[0].store(first_elem, std::memory_order_relaxed); + } + static GcRoot<String> LookupString(StringDexCacheType* dex_cache, + uint32_t string_idx, + uint32_t cache_size) { + StringDexCachePair index_string = dex_cache[string_idx % cache_size] + .load(std::memory_order_relaxed); + if (string_idx != index_string.string_index) return GcRoot<String>(nullptr); + DCHECK(!index_string.string_pointer.IsNull()); + return index_string.string_pointer; + } +}; +using StringDexCacheType = std::atomic<StringDexCachePair>; + + // C++ mirror of java.lang.DexCache. class MANAGED DexCache FINAL : public Object { public: // Size of java.lang.DexCache.class. static uint32_t ClassSize(PointerSize pointer_size); + // Size of string dex cache. Needs to be a power of 2 for entrypoint assumptions to hold. + static constexpr size_t kDexCacheStringCacheSize = 1024; + static_assert(IsPowerOfTwo(kDexCacheStringCacheSize), + "String dex cache size is not a power of 2."); + + static constexpr size_t StaticStringSize() { + return kDexCacheStringCacheSize; + } + // Size of an instance of java.lang.DexCache not including referenced values. static constexpr uint32_t InstanceSize() { return sizeof(DexCache); @@ -48,7 +97,7 @@ class MANAGED DexCache FINAL : public Object { void Init(const DexFile* dex_file, String* location, - GcRoot<String>* strings, + StringDexCacheType* strings, uint32_t num_strings, GcRoot<Class>* resolved_types, uint32_t num_resolved_types, @@ -62,7 +111,7 @@ class MANAGED DexCache FINAL : public Object { SHARED_REQUIRES(Locks::mutator_lock_); template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor> - void FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor) + void FixupStrings(StringDexCacheType* dest, const Visitor& visitor) SHARED_REQUIRES(Locks::mutator_lock_); template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor> @@ -109,10 +158,10 @@ class MANAGED DexCache FINAL : public Object { return OFFSET_OF_OBJECT_MEMBER(DexCache, num_resolved_methods_); } - String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE + mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_); - void SetResolvedString(uint32_t string_idx, String* resolved) ALWAYS_INLINE + void SetResolvedString(uint32_t string_idx, mirror::String* resolved) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_); Class* GetResolvedType(uint32_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_); @@ -135,11 +184,11 @@ class MANAGED DexCache FINAL : public Object { ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, PointerSize ptr_size) SHARED_REQUIRES(Locks::mutator_lock_); - GcRoot<String>* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { - return GetFieldPtr<GcRoot<String>*>(StringsOffset()); + StringDexCacheType* GetStrings() ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { + return GetFieldPtr64<StringDexCacheType*>(StringsOffset()); } - void SetStrings(GcRoot<String>* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { + void SetStrings(StringDexCacheType* strings) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { SetFieldPtr<false>(StringsOffset(), strings); } @@ -224,7 +273,8 @@ class MANAGED DexCache FINAL : public Object { uint64_t resolved_fields_; // ArtField*, array with num_resolved_fields_ elements. uint64_t resolved_methods_; // ArtMethod*, array with num_resolved_methods_ elements. uint64_t resolved_types_; // GcRoot<Class>*, array with num_resolved_types_ elements. - uint64_t strings_; // GcRoot<String>*, array with num_strings_ elements. + uint64_t strings_; // std::atomic<StringDexCachePair>*, + // array with num_strings_ elements. uint32_t num_resolved_fields_; // Number of elements in the resolved_fields_ array. uint32_t num_resolved_methods_; // Number of elements in the resolved_methods_ array. uint32_t num_resolved_types_; // Number of elements in the resolved_types_ array. diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc index 48f2ca59e8..175997c2dc 100644 --- a/runtime/mirror/dex_cache_test.cc +++ b/runtime/mirror/dex_cache_test.cc @@ -22,6 +22,7 @@ #include "common_runtime_test.h" #include "linear_alloc.h" #include "mirror/class_loader-inl.h" +#include "mirror/dex_cache-inl.h" #include "handle_scope-inl.h" #include "scoped_thread_state_change.h" @@ -40,7 +41,8 @@ TEST_F(DexCacheTest, Open) { Runtime::Current()->GetLinearAlloc()))); ASSERT_TRUE(dex_cache.Get() != nullptr); - EXPECT_EQ(java_lang_dex_file_->NumStringIds(), dex_cache->NumStrings()); + EXPECT_TRUE(dex_cache->StaticStringSize() == dex_cache->NumStrings() + || java_lang_dex_file_->NumStringIds() == dex_cache->NumStrings()); EXPECT_EQ(java_lang_dex_file_->NumTypeIds(), dex_cache->NumResolvedTypes()); EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods()); EXPECT_EQ(java_lang_dex_file_->NumFieldIds(), dex_cache->NumResolvedFields()); diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 0495c957c6..27f8bd72d6 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -147,6 +147,18 @@ inline Object* Object::GetReadBarrierPointer() { #endif } +inline Object* Object::GetReadBarrierPointerAcquire() { +#ifdef USE_BAKER_READ_BARRIER + DCHECK(kUseBakerReadBarrier); + LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_))); + return reinterpret_cast<Object*>(lw.ReadBarrierState()); +#else + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +#endif +} + + inline uint32_t Object::GetMarkBit() { #ifdef USE_READ_BARRIER return GetLockWord(false).MarkBitState(); @@ -814,6 +826,13 @@ inline kSize Object::GetField(MemberOffset field_offset) { } } +template<typename kSize> +inline kSize Object::GetFieldAcquire(MemberOffset field_offset) { + const uint8_t* raw_addr = reinterpret_cast<const uint8_t*>(this) + field_offset.Int32Value(); + const kSize* addr = reinterpret_cast<const kSize*>(raw_addr); + return reinterpret_cast<const Atomic<kSize>*>(addr)->LoadAcquire(); +} + template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags> inline bool Object::CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset, int64_t old_value, int64_t new_value) { diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index 5b129bf2ba..864929444a 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -93,9 +93,12 @@ class MANAGED LOCKABLE Object { template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_); - // TODO: Clean this up and change to return int32_t + // TODO: Clean these up and change to return int32_t Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_); + // Get the read barrier pointer with release semantics, only supported for baker. + Object* GetReadBarrierPointerAcquire() SHARED_REQUIRES(Locks::mutator_lock_); + #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER NO_RETURN #endif @@ -574,6 +577,10 @@ class MANAGED LOCKABLE Object { template<typename kSize, bool kIsVolatile> ALWAYS_INLINE kSize GetField(MemberOffset field_offset) SHARED_REQUIRES(Locks::mutator_lock_); + // Get a field with acquire semantics. + template<typename kSize> + ALWAYS_INLINE kSize GetFieldAcquire(MemberOffset field_offset) + SHARED_REQUIRES(Locks::mutator_lock_); // Verify the type correctness of stores to fields. // TODO: This can cause thread suspension and isn't moving GC safe. diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index 00342202cb..b35a479914 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -62,7 +62,7 @@ class ObjectTest : public CommonRuntimeTest { Handle<String> string( hs.NewHandle(String::AllocFromModifiedUtf8(self, expected_utf16_length, utf8_in))); ASSERT_EQ(expected_utf16_length, string->GetLength()); - ASSERT_TRUE(string->GetValue() != nullptr); + ASSERT_EQ(string->IsValueNull(), false); // strlen is necessary because the 1-character string "\x00\x00" is interpreted as "" ASSERT_TRUE(string->Equals(utf8_in) || (expected_utf16_length == 1 && strlen(utf8_in) == 0)); ASSERT_TRUE(string->Equals(StringPiece(utf8_in)) || diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h index d3660e5615..bc39ea86f7 100644 --- a/runtime/mirror/string-inl.h +++ b/runtime/mirror/string-inl.h @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef ART_RUNTIME_MIRROR_STRING_INL_H_ #define ART_RUNTIME_MIRROR_STRING_INL_H_ @@ -49,6 +48,7 @@ class SetStringCountVisitor { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); + DCHECK(!string->IsCompressed() || kUseStringCompression); } private: @@ -68,10 +68,19 @@ class SetStringCountAndBytesVisitor { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); - uint16_t* value = string->GetValue(); + DCHECK(!string->IsCompressed() || kUseStringCompression); + int32_t length = String::GetLengthFromCount(count_); const uint8_t* const src = reinterpret_cast<uint8_t*>(src_array_->GetData()) + offset_; - for (int i = 0; i < count_; i++) { - value[i] = high_byte_ + (src[i] & 0xFF); + if (string->IsCompressed()) { + uint8_t* valueCompressed = string->GetValueCompressed(); + for (int i = 0; i < length; i++) { + valueCompressed[i] = (src[i] & 0xFF); + } + } else { + uint16_t* value = string->GetValue(); + for (int i = 0; i < length; i++) { + value[i] = high_byte_ + (src[i] & 0xFF); + } } } @@ -96,7 +105,16 @@ class SetStringCountAndValueVisitorFromCharArray { String* string = down_cast<String*>(obj); string->SetCount(count_); const uint16_t* const src = src_array_->GetData() + offset_; - memcpy(string->GetValue(), src, count_ * sizeof(uint16_t)); + const int32_t length = String::GetLengthFromCount(count_); + bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_); + DCHECK(!compressible || kUseStringCompression); + if (compressible) { + for (int i = 0; i < length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]); + } + } else { + memcpy(string->GetValue(), src, length * sizeof(uint16_t)); + } } private: @@ -118,8 +136,22 @@ class SetStringCountAndValueVisitorFromString { // Avoid AsString as object is not yet in live bitmap or allocation stack. String* string = down_cast<String*>(obj); string->SetCount(count_); - const uint16_t* const src = src_string_->GetValue() + offset_; - memcpy(string->GetValue(), src, count_ * sizeof(uint16_t)); + const int32_t length = String::GetLengthFromCount(count_); + bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_); + DCHECK(!compressible || kUseStringCompression); + if (src_string_->IsCompressed()) { + const uint8_t* const src = src_string_->GetValueCompressed() + offset_; + memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t)); + } else { + const uint16_t* const src = src_string_->GetValue() + offset_; + if (compressible) { + for (int i = 0; i < length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]); + } + } else { + memcpy(string->GetValue(), src, length * sizeof(uint16_t)); + } + } } private: @@ -133,17 +165,38 @@ inline String* String::Intern() { } inline uint16_t String::CharAt(int32_t index) { - int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_)); + int32_t count = GetLength(); if (UNLIKELY((index < 0) || (index >= count))) { ThrowStringIndexOutOfBoundsException(index, count); return 0; } - return GetValue()[index]; + if (IsCompressed()) { + return GetValueCompressed()[index]; + } else { + return GetValue()[index]; + } +} + +template <typename MemoryType> +int32_t String::FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) { + const MemoryType* p = chars + start; + const MemoryType* end = chars + GetLength(); + while (p < end) { + if (*p++ == ch) { + return (p - 1) - chars; + } + } + return -1; } template<VerifyObjectFlags kVerifyFlags> inline size_t String::SizeOf() { - size_t size = sizeof(String) + (sizeof(uint16_t) * GetLength<kVerifyFlags>()); + size_t size = sizeof(String); + if (IsCompressed()) { + size += (sizeof(uint8_t) * GetLength<kVerifyFlags>()); + } else { + size += (sizeof(uint16_t) * GetLength<kVerifyFlags>()); + } // String.equals() intrinsics assume zero-padding up to kObjectAlignment, // so make sure the zero-padding is actually copied around if GC compaction // chooses to copy only SizeOf() bytes. @@ -152,31 +205,35 @@ inline size_t String::SizeOf() { } template <bool kIsInstrumented, typename PreFenceVisitor> -inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorType allocator_type, +inline String* String::Alloc(Thread* self, int32_t utf16_length_with_flag, + gc::AllocatorType allocator_type, const PreFenceVisitor& pre_fence_visitor) { constexpr size_t header_size = sizeof(String); - static_assert(sizeof(utf16_length) <= sizeof(size_t), + const bool compressible = kUseStringCompression && + String::GetCompressionFlagFromCount(utf16_length_with_flag); + const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t); + size_t length = String::GetLengthFromCount(utf16_length_with_flag); + static_assert(sizeof(length) <= sizeof(size_t), "static_cast<size_t>(utf16_length) must not lose bits."); - size_t length = static_cast<size_t>(utf16_length); - size_t data_size = sizeof(uint16_t) * length; + size_t data_size = block_size * length; size_t size = header_size + data_size; // String.equals() intrinsics assume zero-padding up to kObjectAlignment, // so make sure the allocator clears the padding as well. // http://b/23528461 size_t alloc_size = RoundUp(size, kObjectAlignment); - Class* string_class = GetJavaLangString(); + Class* string_class = GetJavaLangString(); // Check for overflow and throw OutOfMemoryError if this was an unreasonable request. // Do this by comparing with the maximum length that will _not_ cause an overflow. - constexpr size_t overflow_length = (-header_size) / sizeof(uint16_t); // Unsigned arithmetic. - constexpr size_t max_alloc_length = overflow_length - 1u; + const size_t overflow_length = (-header_size) / block_size; // Unsigned arithmetic. + const size_t max_alloc_length = overflow_length - 1u; static_assert(IsAligned<sizeof(uint16_t)>(kObjectAlignment), "kObjectAlignment must be at least as big as Java char alignment"); - constexpr size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / sizeof(uint16_t)); + const size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / block_size); if (UNLIKELY(length > max_length)) { self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow", PrettyDescriptor(string_class).c_str(), - utf16_length).c_str()); + static_cast<int>(length)).c_str()); return nullptr; } @@ -187,11 +244,22 @@ inline String* String::Alloc(Thread* self, int32_t utf16_length, gc::AllocatorTy } template <bool kIsInstrumented> +inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) { + SetStringCountVisitor visitor(0); + return Alloc<kIsInstrumented>(self, 0, allocator_type, visitor); +} + +template <bool kIsInstrumented> inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length, Handle<ByteArray> array, int32_t offset, int32_t high_byte, gc::AllocatorType allocator_type) { - SetStringCountAndBytesVisitor visitor(byte_length, array, offset, high_byte << 8); - String* string = Alloc<kIsInstrumented>(self, byte_length, allocator_type, visitor); + const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset; + const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) + && (high_byte == 0); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length) + : byte_length; + SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8); + String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return string; } @@ -201,16 +269,24 @@ inline String* String::AllocFromCharArray(Thread* self, int32_t count, gc::AllocatorType allocator_type) { // It is a caller error to have a count less than the actual array's size. DCHECK_GE(array->GetLength(), count); - SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset); - String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor); + const bool compressible = kUseStringCompression && + String::AllASCII<uint16_t>(array->GetData() + offset, count); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count; + SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset); + String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return new_string; } template <bool kIsInstrumented> inline String* String::AllocFromString(Thread* self, int32_t string_length, Handle<String> string, int32_t offset, gc::AllocatorType allocator_type) { - SetStringCountAndValueVisitorFromString visitor(string_length, string, offset); - String* new_string = Alloc<kIsInstrumented>(self, string_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && + ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset, + string_length)); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length) + : string_length; + SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset); + String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor); return new_string; } @@ -219,11 +295,28 @@ inline int32_t String::GetHashCode() { if (UNLIKELY(result == 0)) { result = ComputeHashCode(); } - DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0) - << ToModifiedUtf8() << " " << result; + if (kIsDebugBuild) { + if (IsCompressed()) { + DCHECK(result != 0 || ComputeUtf16Hash(GetValueCompressed(), GetLength()) == 0) + << ToModifiedUtf8() << " " << result; + } else { + DCHECK(result != 0 || ComputeUtf16Hash(GetValue(), GetLength()) == 0) + << ToModifiedUtf8() << " " << result; + } + } return result; } +template<typename MemoryType> +bool String::AllASCII(const MemoryType* const chars, const int length) { + for (int i = 0; i < length; ++i) { + if (chars[i] > 0x80) { + return false; + } + } + return true; +} + } // namespace mirror } // namespace art diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc index 33aca0304c..46caa4d73f 100644 --- a/runtime/mirror/string.cc +++ b/runtime/mirror/string.cc @@ -41,15 +41,11 @@ int32_t String::FastIndexOf(int32_t ch, int32_t start) { } else if (start > count) { start = count; } - const uint16_t* chars = GetValue(); - const uint16_t* p = chars + start; - const uint16_t* end = chars + count; - while (p < end) { - if (*p++ == ch) { - return (p - 1) - chars; - } + if (IsCompressed()) { + return FastIndexOf<uint8_t>(GetValueCompressed(), ch, start); + } else { + return FastIndexOf<uint16_t>(GetValue(), ch, start); } - return -1; } void String::SetClass(Class* java_lang_String) { @@ -65,45 +61,91 @@ void String::ResetClass() { } int String::ComputeHashCode() { - const int32_t hash_code = ComputeUtf16Hash(GetValue(), GetLength()); + int32_t hash_code = 0; + if (IsCompressed()) { + hash_code = ComputeUtf16Hash(GetValueCompressed(), GetLength()); + } else { + hash_code = ComputeUtf16Hash(GetValue(), GetLength()); + } SetHashCode(hash_code); return hash_code; } int32_t String::GetUtfLength() { - return CountUtf8Bytes(GetValue(), GetLength()); + if (IsCompressed()) { + return GetLength(); + } else { + return CountUtf8Bytes(GetValue(), GetLength()); + } } void String::SetCharAt(int32_t index, uint16_t c) { - DCHECK((index >= 0) && (index < count_)); - GetValue()[index] = c; + DCHECK((index >= 0) && (index < GetLength())); + if (IsCompressed()) { + // TODO: Handle the case where String is compressed and c is non-ASCII + GetValueCompressed()[index] = static_cast<uint8_t>(c); + } else { + GetValue()[index] = c; + } } String* String::AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) { int32_t length = string->GetLength(); int32_t length2 = string2->GetLength(); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(length + length2); - String* new_string = Alloc<true>(self, length + length2, allocator_type, visitor); + const bool compressible = kUseStringCompression && (string->IsCompressed() && string2->IsCompressed()); + const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(length + length2) + : (length + length2); + + SetStringCountVisitor visitor(length_with_flag); + String* new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor); if (UNLIKELY(new_string == nullptr)) { return nullptr; } - uint16_t* new_value = new_string->GetValue(); - memcpy(new_value, string->GetValue(), length * sizeof(uint16_t)); - memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t)); + if (compressible) { + uint8_t* new_value = new_string->GetValueCompressed(); + memcpy(new_value, string->GetValueCompressed(), length * sizeof(uint8_t)); + memcpy(new_value + length, string2->GetValueCompressed(), length2 * sizeof(uint8_t)); + } else { + uint16_t* new_value = new_string->GetValue(); + if (string->IsCompressed()) { + for (int i = 0; i < length; ++i) { + new_value[i] = string->CharAt(i); + } + } else { + memcpy(new_value, string->GetValue(), length * sizeof(uint16_t)); + } + if (string2->IsCompressed()) { + for (int i = 0; i < length2; ++i) { + new_value[i+length] = string2->CharAt(i); + } + } else { + memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t)); + } + } return new_string; } String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) { CHECK(utf16_data_in != nullptr || utf16_length == 0); gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(utf16_length); - String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && + String::AllASCII<uint16_t>(utf16_data_in, utf16_length); + int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length) + : utf16_length; + SetStringCountVisitor visitor(length_with_flag); + String* string = Alloc<true>(self, length_with_flag, allocator_type, visitor); if (UNLIKELY(string == nullptr)) { return nullptr; } - uint16_t* array = string->GetValue(); - memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t)); + if (compressible) { + for (int i = 0; i < utf16_length; ++i) { + string->GetValueCompressed()[i] = static_cast<uint8_t>(utf16_data_in[i]); + } + } else { + uint16_t* array = string->GetValue(); + memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t)); + } return string; } @@ -121,13 +163,20 @@ String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in, int32_t utf8_length) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - SetStringCountVisitor visitor(utf16_length); - String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); + const bool compressible = kUseStringCompression && (utf16_length == utf8_length); + const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length) + : utf16_length; + SetStringCountVisitor visitor(utf16_length_with_flag); + String* string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor); if (UNLIKELY(string == nullptr)) { return nullptr; } - uint16_t* utf16_data_out = string->GetValue(); - ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); + if (compressible) { + memcpy(string->GetValueCompressed(), utf8_data_in, utf16_length * sizeof(uint8_t)); + } else { + uint16_t* utf16_data_out = string->GetValue(); + ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); + } return string; } @@ -219,10 +268,16 @@ bool String::Equals(const StringPiece& modified_utf8) { // Create a modified UTF-8 encoded std::string from a java/lang/String object. std::string String::ToModifiedUtf8() { - const uint16_t* chars = GetValue(); size_t byte_count = GetUtfLength(); std::string result(byte_count, static_cast<char>(0)); - ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); + if (IsCompressed()) { + for (size_t i = 0; i < byte_count; ++i) { + result[i] = static_cast<char>(CharAt(i)); + } + } else { + const uint16_t* chars = GetValue(); + ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); + } return result; } @@ -242,11 +297,24 @@ int32_t String::CompareTo(String* rhs) { int32_t rhsCount = rhs->GetLength(); int32_t countDiff = lhsCount - rhsCount; int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount; - const uint16_t* lhsChars = lhs->GetValue(); - const uint16_t* rhsChars = rhs->GetValue(); - int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount); - if (otherRes != 0) { - return otherRes; + if (lhs->IsCompressed() && rhs->IsCompressed()) { + int32_t comparison = memcmp(lhs->GetValueCompressed(), rhs->GetValueCompressed(), minCount * sizeof(uint8_t)); + if (comparison != 0) { + return comparison; + } + } else if (lhs->IsCompressed() || rhs->IsCompressed()) { + for (int32_t i = 0; i < minCount; ++i) { + if (lhs->CharAt(i) != rhs->CharAt(i)) { + return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i)); + } + } + } else { + const uint16_t* lhsChars = lhs->GetValue(); + const uint16_t* rhsChars = rhs->GetValue(); + int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount); + if (otherRes != 0) { + return otherRes; + } } return countDiff; } @@ -260,7 +328,14 @@ CharArray* String::ToCharArray(Thread* self) { Handle<String> string(hs.NewHandle(this)); CharArray* result = CharArray::Alloc(self, GetLength()); if (result != nullptr) { - memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t)); + if (string->IsCompressed()) { + int32_t length = string->GetLength(); + for (int i = 0; i < length; ++i) { + result->GetData()[i] = string->CharAt(i); + } + } else { + memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t)); + } } else { self->AssertPendingOOMException(); } @@ -269,8 +344,18 @@ CharArray* String::ToCharArray(Thread* self) { void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) { uint16_t* data = array->GetData() + index; - uint16_t* value = GetValue() + start; - memcpy(data, value, (end - start) * sizeof(uint16_t)); + if (IsCompressed()) { + for (int i = start; i < end; ++i) { + data[i-start] = CharAt(i); + } + } else { + uint16_t* value = GetValue() + start; + memcpy(data, value, (end - start) * sizeof(uint16_t)); + } +} + +bool String::IsValueNull() { + return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr); } } // namespace mirror diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index d492ba3162..8695fe84f4 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -31,6 +31,9 @@ class StubTest_ReadBarrierForRoot_Test; namespace mirror { +// String Compression +static constexpr bool kUseStringCompression = false; + // C++ mirror of java.lang.String class MANAGED String FINAL : public Object { public: @@ -54,18 +57,28 @@ class MANAGED String FINAL : public Object { return &value_[0]; } + uint8_t* GetValueCompressed() SHARED_REQUIRES(Locks::mutator_lock_) { + return &value_compressed_[0]; + } + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> size_t SizeOf() SHARED_REQUIRES(Locks::mutator_lock_); + // Taking out the first/uppermost bit because it is not part of actual length value template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> int32_t GetLength() SHARED_REQUIRES(Locks::mutator_lock_) { + return GetLengthFromCount(GetCount<kVerifyFlags>()); + } + + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + int32_t GetCount() SHARED_REQUIRES(Locks::mutator_lock_) { return GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(String, count_)); } void SetCount(int32_t new_count) SHARED_REQUIRES(Locks::mutator_lock_) { // Count is invariant so use non-transactional mode. Also disable check as we may run inside // a transaction. - DCHECK_LE(0, new_count); + DCHECK_LE(0, (new_count & INT32_MAX)); SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count); } @@ -82,12 +95,6 @@ class MANAGED String FINAL : public Object { String* Intern() SHARED_REQUIRES(Locks::mutator_lock_); - template <bool kIsInstrumented, typename PreFenceVisitor> - ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length, - gc::AllocatorType allocator_type, - const PreFenceVisitor& pre_fence_visitor) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); - template <bool kIsInstrumented> ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length, Handle<ByteArray> array, int32_t offset, @@ -107,6 +114,11 @@ class MANAGED String FINAL : public Object { gc::AllocatorType allocator_type) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + template <bool kIsInstrumented> + ALWAYS_INLINE static String* AllocEmptyString(Thread* self, + gc::AllocatorType allocator_type) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromStrings(Thread* self, Handle<String> string, Handle<String> string2) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); @@ -149,6 +161,10 @@ class MANAGED String FINAL : public Object { int32_t FastIndexOf(int32_t ch, int32_t start) SHARED_REQUIRES(Locks::mutator_lock_); + template <typename MemoryType> + int32_t FastIndexOf(MemoryType* chars, int32_t ch, int32_t start) + SHARED_REQUIRES(Locks::mutator_lock_); + int32_t CompareTo(String* other) SHARED_REQUIRES(Locks::mutator_lock_); CharArray* ToCharArray(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) @@ -157,6 +173,28 @@ class MANAGED String FINAL : public Object { void GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) SHARED_REQUIRES(Locks::mutator_lock_); + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + bool IsCompressed() SHARED_REQUIRES(Locks::mutator_lock_) { + return kUseStringCompression && GetCompressionFlagFromCount(GetCount()); + } + + bool IsValueNull() SHARED_REQUIRES(Locks::mutator_lock_); + + template<typename MemoryType> + static bool AllASCII(const MemoryType* const chars, const int length); + + ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) { + return kUseStringCompression && ((count & (1u << 31)) != 0); + } + + ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) { + return kUseStringCompression ? (count & INT32_MAX) : count; + } + + ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) { + return kUseStringCompression ? (count | (1u << 31)) : count; + } + static Class* GetJavaLangString() SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(!java_lang_String_.IsNull()); return java_lang_String_.Read(); @@ -174,12 +212,24 @@ class MANAGED String FINAL : public Object { SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code); } + template <bool kIsInstrumented, typename PreFenceVisitor> + ALWAYS_INLINE static String* Alloc(Thread* self, int32_t utf16_length_with_flag, + gc::AllocatorType allocator_type, + const PreFenceVisitor& pre_fence_visitor) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses". + // First bit (uppermost/leftmost) is taken out for Compressed/Uncompressed flag + // [0] Uncompressed: string uses 16-bit memory | [1] Compressed: 8-bit memory int32_t count_; uint32_t hash_code_; - uint16_t value_[0]; + // Compression of all-ASCII into 8-bit memory leads to usage one of these fields + union { + uint16_t value_[0]; + uint8_t value_compressed_[0]; + }; static GcRoot<Class> java_lang_String_; diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc index 6d5e7c7705..d4e54cfa34 100644 --- a/runtime/native/java_lang_Class.cc +++ b/runtime/native/java_lang_Class.cc @@ -198,12 +198,25 @@ ALWAYS_INLINE static inline ArtField* FindFieldByName( } size_t low = 0; size_t high = fields->size(); - const uint16_t* const data = name->GetValue(); + const bool is_name_compressed = name->IsCompressed(); + const uint16_t* const data = (is_name_compressed) ? nullptr : name->GetValue(); + const uint8_t* const data_compressed = (is_name_compressed) ? name->GetValueCompressed() + : nullptr; const size_t length = name->GetLength(); while (low < high) { auto mid = (low + high) / 2; ArtField& field = fields->At(mid); - int result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length); + int result = 0; + if (is_name_compressed) { + size_t field_length = strlen(field.GetName()); + size_t min_size = (length < field_length) ? length : field_length; + result = memcmp(field.GetName(), data_compressed, min_size); + if (result == 0) { + result = field_length - length; + } + } else { + result = CompareModifiedUtf8ToUtf16AsCodePointValues(field.GetName(), data, length); + } // Alternate approach, only a few % faster at the cost of more allocations. // int result = field->GetStringName(self, true)->CompareTo(name); if (result < 0) { @@ -636,8 +649,7 @@ static jobject Class_newInstance(JNIEnv* env, jobject javaThis) { // Invoke the string allocator to return an empty string for the string class. if (klass->IsStringClass()) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); - mirror::SetStringCountVisitor visitor(0); - mirror::Object* obj = mirror::String::Alloc<true>(soa.Self(), 0, allocator_type, visitor); + mirror::Object* obj = mirror::String::AllocEmptyString<true>(soa.Self(), allocator_type); if (UNLIKELY(soa.Self()->IsExceptionPending())) { return nullptr; } else { diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc index 994ccb1ad9..f0140a303b 100644 --- a/runtime/native/java_lang_DexCache.cc +++ b/runtime/native/java_lang_DexCache.cc @@ -59,7 +59,7 @@ static jobject DexCache_getResolvedType(JNIEnv* env, jobject javaDexCache, jint static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) { ScopedFastNativeObjectAccess soa(env); mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache); - CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings()); + CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds()); return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index)); } @@ -75,7 +75,7 @@ static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint s jobject string) { ScopedFastNativeObjectAccess soa(env); mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache); - CHECK_LT(static_cast<size_t>(string_index), dex_cache->NumStrings()); + CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds()); dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String*>(string)); } diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc index 1216824b5a..64d56f6b26 100644 --- a/runtime/native/libcore_util_CharsetUtils.cc +++ b/runtime/native/libcore_util_CharsetUtils.cc @@ -165,10 +165,9 @@ static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, ji return nullptr; } - const jchar* src = &(string->GetValue()[offset]); jbyte* dst = &bytes[0]; - for (int i = length - 1; i >= 0; --i) { - jchar ch = *src++; + for (int i = 0; i < length; ++i) { + jchar ch = string->CharAt(offset + i); if (ch > maxValidChar) { ch = '?'; } diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk index a34a84100a..e39af2dcd9 100644 --- a/runtime/simulator/Android.mk +++ b/runtime/simulator/Android.mk @@ -22,6 +22,9 @@ LIBART_SIMULATOR_SRC_FILES := \ code_simulator.cc \ code_simulator_arm64.cc +LIBART_SIMULATOR_CFLAGS := \ + -DVIXL_INCLUDE_SIMULATOR_AARCH64 + # $(1): target or host # $(2): ndebug or debug define build-libart-simulator @@ -54,6 +57,7 @@ define build-libart-simulator LOCAL_MODULE_CLASS := SHARED_LIBRARIES LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES) + LOCAL_CFLAGS := $$(LIBART_SIMULATOR_CFLAGS) ifeq ($$(art_target_or_host),target) $(call set-target-local-clang-vars) diff --git a/runtime/stack.h b/runtime/stack.h index cf33ae1a6a..850d2a4fa8 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -224,7 +224,6 @@ class ShadowFrame { int64_t GetVRegLong(size_t i) const { DCHECK_LT(i, NumberOfVRegs()); const uint32_t* vreg = &vregs_[i]; - // Alignment attribute required for GCC 4.8 typedef const int64_t unaligned_int64 __attribute__ ((aligned (4))); return *reinterpret_cast<unaligned_int64*>(vreg); } @@ -232,7 +231,6 @@ class ShadowFrame { double GetVRegDouble(size_t i) const { DCHECK_LT(i, NumberOfVRegs()); const uint32_t* vreg = &vregs_[i]; - // Alignment attribute required for GCC 4.8 typedef const double unaligned_double __attribute__ ((aligned (4))); return *reinterpret_cast<unaligned_double*>(vreg); } @@ -289,7 +287,6 @@ class ShadowFrame { void SetVRegLong(size_t i, int64_t val) { DCHECK_LT(i, NumberOfVRegs()); uint32_t* vreg = &vregs_[i]; - // Alignment attribute required for GCC 4.8 typedef int64_t unaligned_int64 __attribute__ ((aligned (4))); *reinterpret_cast<unaligned_int64*>(vreg) = val; // This is needed for moving collectors since these can update the vreg references if they @@ -303,7 +300,6 @@ class ShadowFrame { void SetVRegDouble(size_t i, double val) { DCHECK_LT(i, NumberOfVRegs()); uint32_t* vreg = &vregs_[i]; - // Alignment attribute required for GCC 4.8 typedef double unaligned_double __attribute__ ((aligned (4))); *reinterpret_cast<unaligned_double*>(vreg) = val; // This is needed for moving collectors since these can update the vreg references if they diff --git a/runtime/thread.cc b/runtime/thread.cc index 0457ba0d26..79b9f02991 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1431,6 +1431,12 @@ struct StackDumpVisitor : public StackVisitor { if (o == nullptr) { os << "an unknown object"; } else { + if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) { + // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack + // may have not been flipped yet and "o" may be a from-space (stale) ref, in which case the + // IdentityHashCode call below will crash. So explicitly mark/forward it here. + o = ReadBarrier::Mark(o); + } if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) && Locks::mutator_lock_->IsExclusiveHeld(Thread::Current())) { // Getting the identity hashcode here would result in lock inflation and suspension of the diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 688514cd76..ab1f198646 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -60,7 +60,8 @@ static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for // some history. -static constexpr bool kDumpUnattachedThreadNativeStack = true; +// Turned off again. b/29248079 +static constexpr bool kDumpUnattachedThreadNativeStack = false; ThreadList::ThreadList() : suspend_all_count_(0), diff --git a/runtime/utf.cc b/runtime/utf.cc index 5e9fdf7fc8..7e06482635 100644 --- a/runtime/utf.cc +++ b/runtime/utf.cc @@ -170,14 +170,6 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, } } -int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) { - uint32_t hash = 0; - while (char_count--) { - hash = hash * 31 + *chars++; - } - return static_cast<int32_t>(hash); -} - int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length) { uint32_t hash = 0; while (utf16_length != 0u) { diff --git a/runtime/utf.h b/runtime/utf.h index 27d2fd5f56..7c9c333126 100644 --- a/runtime/utf.h +++ b/runtime/utf.h @@ -82,7 +82,16 @@ void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, */ int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count) SHARED_REQUIRES(Locks::mutator_lock_); -int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count); + +template<typename MemoryType> +int32_t ComputeUtf16Hash(const MemoryType* chars, size_t char_count) { + uint32_t hash = 0; + while (char_count--) { + hash = hash * 31 + *chars++; + } + return static_cast<int32_t>(hash); +} + int32_t ComputeUtf16HashFromModifiedUtf8(const char* utf8, size_t utf16_length); // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a diff --git a/runtime/utils.h b/runtime/utils.h index 84079e2fb2..693e0b87ee 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -380,21 +380,7 @@ int64_t GetFileSizeBytes(const std::string& filename); NO_RETURN void SleepForever(); inline void FlushInstructionCache(char* begin, char* end) { - // Only use __builtin___clear_cache with Clang or with GCC >= 4.3.0 - // (__builtin___clear_cache was introduced in GCC 4.3.0). -#if defined(__clang__) || GCC_VERSION >= 40300 __builtin___clear_cache(begin, end); -#else - // Only warn on non-Intel platforms, as x86 and x86-64 do not need - // cache flush instructions, as long as the "code uses the same - // linear address for modifying and fetching the instruction". See - // "Intel(R) 64 and IA-32 Architectures Software Developer's Manual - // Volume 3A: System Programming Guide, Part 1", section 11.6 - // "Self-Modifying Code". -#if !defined(__i386__) && !defined(__x86_64__) - UNIMPLEMENTED(WARNING) << "cache flush"; -#endif -#endif } } // namespace art diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h index 7733a51aa3..4c63156939 100644 --- a/runtime/utils/dex_cache_arrays_layout-inl.h +++ b/runtime/utils/dex_cache_arrays_layout-inl.h @@ -23,6 +23,7 @@ #include "base/logging.h" #include "gc_root.h" #include "globals.h" +#include "mirror/dex_cache.h" #include "primitive.h" namespace art { @@ -45,12 +46,11 @@ inline DexCacheArraysLayout::DexCacheArraysLayout(PointerSize pointer_size, cons : DexCacheArraysLayout(pointer_size, dex_file->GetHeader()) { } -inline size_t DexCacheArraysLayout::Alignment() const { +inline constexpr size_t DexCacheArraysLayout::Alignment() { // GcRoot<> alignment is 4, i.e. lower than or equal to the pointer alignment. static_assert(alignof(GcRoot<mirror::Class>) == 4, "Expecting alignof(GcRoot<>) == 4"); - static_assert(alignof(GcRoot<mirror::String>) == 4, "Expecting alignof(GcRoot<>) == 4"); - // Pointer alignment is the same as pointer size. - return static_cast<size_t>(pointer_size_); + static_assert(alignof(mirror::StringDexCacheType) == 8, "Expecting alignof(StringDexCacheType) == 8"); + return alignof(mirror::StringDexCacheType); } template <typename T> @@ -87,15 +87,22 @@ inline size_t DexCacheArraysLayout::MethodsAlignment() const { } inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const { - return strings_offset_ + ElementOffset(GcRootAsPointerSize<mirror::String>(), string_idx); + return strings_offset_ + ElementOffset(PointerSize::k64, + string_idx % mirror::DexCache::kDexCacheStringCacheSize); } inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const { - return ArraySize(GcRootAsPointerSize<mirror::String>(), num_elements); + size_t cache_size = mirror::DexCache::kDexCacheStringCacheSize; + if (num_elements < cache_size) { + cache_size = num_elements; + } + return ArraySize(PointerSize::k64, cache_size); } inline size_t DexCacheArraysLayout::StringsAlignment() const { - return alignof(GcRoot<mirror::String>); + static_assert(alignof(mirror::StringDexCacheType) == 8, + "Expecting alignof(StringDexCacheType) == 8"); + return alignof(mirror::StringDexCacheType); } inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const { diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h index f2437fa551..20ffa90592 100644 --- a/runtime/utils/dex_cache_arrays_layout.h +++ b/runtime/utils/dex_cache_arrays_layout.h @@ -52,7 +52,7 @@ class DexCacheArraysLayout { return size_; } - size_t Alignment() const; + static constexpr size_t Alignment(); size_t TypesOffset() const { return types_offset_; diff --git a/test/020-string/expected.txt b/test/020-string/expected.txt index 76b8929bd7..83a0835718 100644 --- a/test/020-string/expected.txt +++ b/test/020-string/expected.txt @@ -1,6 +1,6 @@ testStr is 'This is a very nice string' This is a very nice string -Compare result is 32 +Compare result is greater than zero Compare unicode: -65302 Got expected exception subStr is 'uick brown fox jumps over the lazy ' diff --git a/test/020-string/src/Main.java b/test/020-string/src/Main.java index 710808255c..ccf94aabcc 100644 --- a/test/020-string/src/Main.java +++ b/test/020-string/src/Main.java @@ -45,7 +45,14 @@ public class Main { if (testStr.length() != testStr2.length()) System.out.println("WARNING: stringTest length mismatch"); - System.out.println("Compare result is " + testStr.compareTo(testStr2)); + int compareResult = testStr.compareTo(testStr2); + if (compareResult > 0) { + System.out.println("Compare result is greater than zero"); + } else if (compareResult == 0) { + System.out.println("Compare result is equal to zero"); + } else { + System.out.println("Compare result is less than zero"); + } // expected: -65302 String s1 = "\u0c6d\u0cb6\u0d00\u0000\u0080\u0080\u0080\u0000\u0002\u0002\u0002\u0000\u00e9\u00e9\u00e9"; diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt index de99872b52..72656ae916 100644 --- a/test/031-class-attributes/expected.txt +++ b/test/031-class-attributes/expected.txt @@ -84,7 +84,7 @@ abstract final [LClassAttrs$PackagePrivateInnerInterface; enclosingCon: null enclosingMeth: null modifiers: 1 - package: package otherpackage + package: package otherpackage, Unknown, version 0.0 declaredClasses: [0] member classes: [0] isAnnotation: false diff --git a/test/439-npe/expected.txt b/test/439-npe/expected.txt index 271d40daed..34855ee91d 100644 --- a/test/439-npe/expected.txt +++ b/test/439-npe/expected.txt @@ -1,18 +1,54 @@ -$opt$setObjectField -$opt$setIntField -$opt$setFloatField -$opt$setLongField -$opt$setDoubleField -$opt$setByteField -$opt$setBooleanField -$opt$setCharField -$opt$setShortField -$opt$getObjectField -$opt$getIntField -$opt$getFloatField -$opt$getLongField -$opt$getDoubleField -$opt$getByteField -$opt$getBooleanField -$opt$getCharField -$opt$getShortField +$opt$noinline$setObjectField +$opt$noinline$setIntField +$opt$noinline$setFloatField +$opt$noinline$setLongField +$opt$noinline$setDoubleField +$opt$noinline$setByteField +$opt$noinline$setBooleanField +$opt$noinline$setCharField +$opt$noinline$setShortField +$opt$noinline$getObjectField +$opt$noinline$getIntField +$opt$noinline$getFloatField +$opt$noinline$getLongField +$opt$noinline$getDoubleField +$opt$noinline$getByteField +$opt$noinline$getBooleanField +$opt$noinline$getCharField +$opt$noinline$getShortField +$opt$noinline$setVolatileObjectField +$opt$noinline$setVolatileIntField +$opt$noinline$setVolatileFloatField +$opt$noinline$setVolatileLongField +$opt$noinline$setVolatileDoubleField +$opt$noinline$setVolatileByteField +$opt$noinline$setVolatileBooleanField +$opt$noinline$setVolatileCharField +$opt$noinline$setVolatileShortField +$opt$noinline$getVolatileObjectField +$opt$noinline$getVolatileIntField +$opt$noinline$getVolatileFloatField +$opt$noinline$getVolatileLongField +$opt$noinline$getVolatileDoubleField +$opt$noinline$getVolatileByteField +$opt$noinline$getVolatileBooleanField +$opt$noinline$getVolatileCharField +$opt$noinline$getVolatileShortField +$opt$noinline$setObjectElement +$opt$noinline$setIntElement +$opt$noinline$setFloatElement +$opt$noinline$setLongElement +$opt$noinline$setDoubleElement +$opt$noinline$setByteElement +$opt$noinline$setBooleanElement +$opt$noinline$setCharElement +$opt$noinline$setShortElement +$opt$noinline$getObjectElement +$opt$noinline$getIntElement +$opt$noinline$getFloatElement +$opt$noinline$getLongElement +$opt$noinline$getDoubleElement +$opt$noinline$getByteElement +$opt$noinline$getBooleanElement +$opt$noinline$getCharElement +$opt$noinline$getShortElement diff --git a/test/439-npe/src/Main.java b/test/439-npe/src/Main.java index 40c2645796..8f66da04de 100644 --- a/test/439-npe/src/Main.java +++ b/test/439-npe/src/Main.java @@ -15,199 +15,624 @@ */ public class Main { + public static boolean doThrow = false; - private volatile Object objectField; - private volatile int intField; - private volatile float floatField; - private volatile long longField; - private volatile double doubleField; - private volatile byte byteField; - private volatile boolean booleanField; - private volatile char charField; - private volatile short shortField; - - public static void $opt$setObjectField(Main m) { + private Object objectField; + private int intField; + private float floatField; + private long longField; + private double doubleField; + private byte byteField; + private boolean booleanField; + private char charField; + private short shortField; + + private volatile Object volatileObjectField; + private volatile int volatileIntField; + private volatile float volatileFloatField; + private volatile long volatileLongField; + private volatile double volatileDoubleField; + private volatile byte volatileByteField; + private volatile boolean volatileBooleanField; + private volatile char volatileCharField; + private volatile short volatileShortField; + + public static void $opt$noinline$setObjectField(Main m) { + if (doThrow) { throw new Error(); } m.objectField = null; } - public static void $opt$setIntField(Main m) { + public static void $opt$noinline$setIntField(Main m) { + if (doThrow) { throw new Error(); } m.intField = 0; } - public static void $opt$setFloatField(Main m) { + public static void $opt$noinline$setFloatField(Main m) { + if (doThrow) { throw new Error(); } m.floatField = 0; } - public static void $opt$setLongField(Main m) { + public static void $opt$noinline$setLongField(Main m) { + if (doThrow) { throw new Error(); } m.longField = 0; } - public static void $opt$setDoubleField(Main m) { + public static void $opt$noinline$setDoubleField(Main m) { + if (doThrow) { throw new Error(); } m.doubleField = 0; } - public static void $opt$setByteField(Main m) { + public static void $opt$noinline$setByteField(Main m) { + if (doThrow) { throw new Error(); } m.byteField = 0; } - public static void $opt$setBooleanField(Main m) { + public static void $opt$noinline$setBooleanField(Main m) { + if (doThrow) { throw new Error(); } m.booleanField = false; } - public static void $opt$setCharField(Main m) { + public static void $opt$noinline$setCharField(Main m) { + if (doThrow) { throw new Error(); } m.charField = 0; } - public static void $opt$setShortField(Main m) { + public static void $opt$noinline$setShortField(Main m) { + if (doThrow) { throw new Error(); } m.shortField = 0; } - public static Object $opt$getObjectField(Main m) { + public static Object $opt$noinline$getObjectField(Main m) { + if (doThrow) { throw new Error(); } return m.objectField; } - public static int $opt$getIntField(Main m) { + public static int $opt$noinline$getIntField(Main m) { + if (doThrow) { throw new Error(); } return m.intField; } - public static float $opt$getFloatField(Main m) { + public static float $opt$noinline$getFloatField(Main m) { + if (doThrow) { throw new Error(); } return m.floatField; } - public static long $opt$getLongField(Main m) { + public static long $opt$noinline$getLongField(Main m) { + if (doThrow) { throw new Error(); } return m.longField; } - public static double $opt$getDoubleField(Main m) { + public static double $opt$noinline$getDoubleField(Main m) { + if (doThrow) { throw new Error(); } return m.doubleField; } - public static byte $opt$getByteField(Main m) { + public static byte $opt$noinline$getByteField(Main m) { + if (doThrow) { throw new Error(); } return m.byteField; } - public static boolean $opt$getBooleanField(Main m) { + public static boolean $opt$noinline$getBooleanField(Main m) { + if (doThrow) { throw new Error(); } return m.booleanField; } - public static char $opt$getCharField(Main m) { + public static char $opt$noinline$getCharField(Main m) { + if (doThrow) { throw new Error(); } return m.charField; } - public static short $opt$getShortField(Main m) { + public static short $opt$noinline$getShortField(Main m) { + if (doThrow) { throw new Error(); } return m.shortField; } + public static void $opt$noinline$setVolatileObjectField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileObjectField = null; + } + + public static void $opt$noinline$setVolatileIntField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileIntField = 0; + } + + public static void $opt$noinline$setVolatileFloatField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileFloatField = 0; + } + + public static void $opt$noinline$setVolatileLongField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileLongField = 0; + } + + public static void $opt$noinline$setVolatileDoubleField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileDoubleField = 0; + } + + public static void $opt$noinline$setVolatileByteField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileByteField = 0; + } + + public static void $opt$noinline$setVolatileBooleanField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileBooleanField = false; + } + + public static void $opt$noinline$setVolatileCharField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileCharField = 0; + } + + public static void $opt$noinline$setVolatileShortField(Main m) { + if (doThrow) { throw new Error(); } + m.volatileShortField = 0; + } + + public static Object $opt$noinline$getVolatileObjectField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileObjectField; + } + + public static int $opt$noinline$getVolatileIntField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileIntField; + } + + public static float $opt$noinline$getVolatileFloatField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileFloatField; + } + + public static long $opt$noinline$getVolatileLongField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileLongField; + } + + public static double $opt$noinline$getVolatileDoubleField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileDoubleField; + } + + public static byte $opt$noinline$getVolatileByteField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileByteField; + } + + public static boolean $opt$noinline$getVolatileBooleanField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileBooleanField; + } + + public static char $opt$noinline$getVolatileCharField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileCharField; + } + + public static short $opt$noinline$getVolatileShortField(Main m) { + if (doThrow) { throw new Error(); } + return m.volatileShortField; + } + + public static void $opt$noinline$setObjectElement(Object[] a) { + if (doThrow) { throw new Error(); } + a[0] = null; + } + + public static void $opt$noinline$setIntElement(int[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setFloatElement(float[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setLongElement(long[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setDoubleElement(double[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setByteElement(byte[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setBooleanElement(boolean[] a) { + if (doThrow) { throw new Error(); } + a[0] = false; + } + + public static void $opt$noinline$setCharElement(char[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static void $opt$noinline$setShortElement(short[] a) { + if (doThrow) { throw new Error(); } + a[0] = 0; + } + + public static Object $opt$noinline$getObjectElement(Object[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static int $opt$noinline$getIntElement(int[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static float $opt$noinline$getFloatElement(float[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static long $opt$noinline$getLongElement(long[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static double $opt$noinline$getDoubleElement(double[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static byte $opt$noinline$getByteElement(byte[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static boolean $opt$noinline$getBooleanElement(boolean[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static char $opt$noinline$getCharElement(char[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + + public static short $opt$noinline$getShortElement(short[] a) { + if (doThrow) { throw new Error(); } + return a[0]; + } + public static void main(String[] args) { - int methodLine = 30; - int thisLine = 103; + int methodLine = 42; + int thisLine = 312; + try { + $opt$noinline$setObjectField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 2, methodLine, "$opt$noinline$setObjectField"); + } + try { + $opt$noinline$setIntField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setIntField"); + } + try { + $opt$noinline$setFloatField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setFloatField"); + } + try { + $opt$noinline$setLongField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setLongField"); + } + try { + $opt$noinline$setDoubleField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setDoubleField"); + } + try { + $opt$noinline$setByteField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setByteField"); + } + try { + $opt$noinline$setBooleanField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setBooleanField"); + } + try { + $opt$noinline$setCharField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setCharField"); + } + try { + $opt$noinline$setShortField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setShortField"); + } + try { + $opt$noinline$getObjectField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getObjectField"); + } + try { + $opt$noinline$getIntField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getIntField"); + } + try { + $opt$noinline$getFloatField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getFloatField"); + } + try { + $opt$noinline$getLongField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getLongField"); + } + try { + $opt$noinline$getDoubleField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getDoubleField"); + } + try { + $opt$noinline$getByteField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getByteField"); + } + try { + $opt$noinline$getBooleanField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getBooleanField"); + } + try { + $opt$noinline$getCharField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getCharField"); + } + try { + $opt$noinline$getShortField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getShortField"); + } + try { + $opt$noinline$setVolatileObjectField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileObjectField"); + } + try { + $opt$noinline$setVolatileIntField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileIntField"); + } + try { + $opt$noinline$setVolatileFloatField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileFloatField"); + } + try { + $opt$noinline$setVolatileLongField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileLongField"); + } + try { + $opt$noinline$setVolatileDoubleField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileDoubleField"); + } + try { + $opt$noinline$setVolatileByteField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileByteField"); + } + try { + $opt$noinline$setVolatileBooleanField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileBooleanField"); + } + try { + $opt$noinline$setVolatileCharField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileCharField"); + } + try { + $opt$noinline$setVolatileShortField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setVolatileShortField"); + } + try { + $opt$noinline$getVolatileObjectField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileObjectField"); + } + try { + $opt$noinline$getVolatileIntField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileIntField"); + } + try { + $opt$noinline$getVolatileFloatField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileFloatField"); + } + try { + $opt$noinline$getVolatileLongField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileLongField"); + } + try { + $opt$noinline$getVolatileDoubleField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileDoubleField"); + } + try { + $opt$noinline$getVolatileByteField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileByteField"); + } + try { + $opt$noinline$getVolatileBooleanField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileBooleanField"); + } + try { + $opt$noinline$getVolatileCharField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileCharField"); + } + try { + $opt$noinline$getVolatileShortField(null); + throw new RuntimeException("Failed to throw NullPointerException."); + } catch (NullPointerException npe) { + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getVolatileShortField"); + } try { - $opt$setObjectField(null); + $opt$noinline$setObjectElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 2, methodLine, "$opt$setObjectField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setObjectElement"); } try { - $opt$setIntField(null); + $opt$noinline$setIntElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setIntField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setIntElement"); } try { - $opt$setFloatField(null); + $opt$noinline$setFloatElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setFloatField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setFloatElement"); } try { - $opt$setLongField(null); + $opt$noinline$setLongElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setLongField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setLongElement"); } try { - $opt$setDoubleField(null); + $opt$noinline$setDoubleElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setDoubleField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setDoubleElement"); } try { - $opt$setByteField(null); + $opt$noinline$setByteElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setByteField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setByteElement"); } try { - $opt$setBooleanField(null); + $opt$noinline$setBooleanElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setBooleanField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setBooleanElement"); } try { - $opt$setCharField(null); + $opt$noinline$setCharElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setCharField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setCharElement"); } try { - $opt$setShortField(null); + $opt$noinline$setShortElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$setShortField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$setShortElement"); } try { - $opt$getObjectField(null); + $opt$noinline$getObjectElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getObjectField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getObjectElement"); } try { - $opt$getIntField(null); + $opt$noinline$getIntElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getIntField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getIntElement"); } try { - $opt$getFloatField(null); + $opt$noinline$getFloatElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getFloatField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getFloatElement"); } try { - $opt$getLongField(null); + $opt$noinline$getLongElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getLongField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getLongElement"); } try { - $opt$getDoubleField(null); + $opt$noinline$getDoubleElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getDoubleField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getDoubleElement"); } try { - $opt$getByteField(null); + $opt$noinline$getByteElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getByteField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getByteElement"); } try { - $opt$getBooleanField(null); + $opt$noinline$getBooleanElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getBooleanField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getBooleanElement"); } try { - $opt$getCharField(null); + $opt$noinline$getCharElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getCharField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getCharElement"); } try { - $opt$getShortField(null); + $opt$noinline$getShortElement(null); throw new RuntimeException("Failed to throw NullPointerException."); } catch (NullPointerException npe) { - check(npe, thisLine += 6, methodLine += 4, "$opt$getShortField"); + check(npe, thisLine += 6, methodLine += 5, "$opt$noinline$getShortElement"); } } diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java index 7c124caa8e..95a11ca010 100644 --- a/test/537-checker-arraycopy/src/Main.java +++ b/test/537-checker-arraycopy/src/Main.java @@ -51,10 +51,10 @@ public class Main { /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after) /// CHECK: InvokeStaticOrDirect intrinsic:SystemArrayCopy - /// CHECK-NOT: test + /// CHECK-NOT: test {{^[^\[].*}}, {{^[^\[].*}} /// CHECK-NOT: call /// CHECK: ReturnVoid - // Checks that the call is intrinsified and that there is no test instruction + // Checks that the call is intrinsified and that there is no register test instruction // when we know the source and destination are not null. public static void arraycopy() { Object[] obj = new Object[4]; diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt index e69de29bb2..49b3771e4e 100644 --- a/test/551-implicit-null-checks/expected.txt +++ b/test/551-implicit-null-checks/expected.txt @@ -0,0 +1,4 @@ +NPE from GetLong +NPE from PutLong +NPE from GetDouble +NPE from PutDouble diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt index bdd066bec3..bd3ecfd5ec 100644 --- a/test/551-implicit-null-checks/info.txt +++ b/test/551-implicit-null-checks/info.txt @@ -1 +1 @@ -Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file +Test that implicit null checks are recorded correctly for longs and doubles. diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java index 677e8d34ca..3586a29d48 100644 --- a/test/551-implicit-null-checks/src/Main.java +++ b/test/551-implicit-null-checks/src/Main.java @@ -18,6 +18,7 @@ public class Main { private class Inner { private long i1; + private double i2; } private Inner inst; @@ -26,12 +27,22 @@ public class Main { try { m.$opt$noinline$testGetLong(); } catch (NullPointerException ex) { - // good + System.out.println("NPE from GetLong"); } try { m.$opt$noinline$testPutLong(778899112233L); } catch (NullPointerException ex) { - // good + System.out.println("NPE from PutLong"); + } + try { + m.$opt$noinline$testGetDouble(); + } catch (NullPointerException ex) { + System.out.println("NPE from GetDouble"); + } + try { + m.$opt$noinline$testPutDouble(1.0); + } catch (NullPointerException ex) { + System.out.println("NPE from PutDouble"); } } @@ -44,4 +55,14 @@ public class Main { inst.i1 = a; throw new Exception(); // prevent inline } + + public void $opt$noinline$testGetDouble() throws Exception { + double result = inst.i2; + throw new Exception(); // prevent inline + } + + public void $opt$noinline$testPutDouble(double a) throws Exception { + inst.i2 = a; + throw new Exception(); // prevent inline + } } diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 65debc9a53..75c4f34073 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -225,9 +225,11 @@ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), # Disable 149-suspend-all-stress, its output is flaky (b/28988206). # Disable 577-profile-foreign-dex (b/27454772). +# Disable 552-checker-sharpening, until compiler component of new string dex cache is added (@cwadsworth, @vmarko) TEST_ART_BROKEN_ALL_TARGET_TESTS := \ 149-suspend-all-stress \ 577-profile-foreign-dex \ + 552-checker-sharpening \ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ @@ -557,16 +559,25 @@ TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := # more parallel moves on x86, thus some Checker assertions may fail. # 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress # instruction yet (b/26601270). -# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are -# not yet handled in the read barrier configuration. TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \ 484-checker-register-hints \ - 527-checker-array-access-split \ - 537-checker-arraycopy + 527-checker-array-access-split # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler). TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := +# Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT). +# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet +# handled in non-Baker read barrier configurations. +TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \ + 537-checker-arraycopy + +# Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler). +# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet +# handled in non-Baker read barrier configurations. +TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \ + 537-checker-arraycopy + ifeq ($(ART_USE_READ_BARRIER),true) ifneq (,$(filter interpreter,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ @@ -577,9 +588,15 @@ ifeq ($(ART_USE_READ_BARRIER),true) ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ - $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \ + $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + ifneq ($(ART_READ_BARRIER_TYPE),BAKER) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ + $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \ + $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + endif endif ifneq (,$(filter jit,$(COMPILER_TYPES))) @@ -587,6 +604,12 @@ ifeq ($(ART_USE_READ_BARRIER),true) $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + ifneq ($(ART_READ_BARRIER_TYPE),BAKER) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ + $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ + $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + endif endif endif diff --git a/tools/cpp-define-generator/constant_dexcache.def b/tools/cpp-define-generator/constant_dexcache.def new file mode 100644 index 0000000000..fd197f2b33 --- /dev/null +++ b/tools/cpp-define-generator/constant_dexcache.def @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(DEFINE_INCLUDE_DEPENDENCIES) +#include "mirror/dex_cache.h" // art::mirror::DexCache, StringDexCachePair +#endif + +DEFINE_EXPR(STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT, int32_t, art::WhichPowerOf2(sizeof(art::mirror::StringDexCachePair))) +DEFINE_EXPR(STRING_DEX_CACHE_SIZE_MINUS_ONE, int32_t, art::mirror::DexCache::kDexCacheStringCacheSize - 1) +DEFINE_EXPR(STRING_DEX_CACHE_HASH_BITS, int32_t, + art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))
\ No newline at end of file diff --git a/tools/cpp-define-generator/offset_dexcache.def b/tools/cpp-define-generator/offset_dexcache.def index 3b26518788..4b9d481eb1 100644 --- a/tools/cpp-define-generator/offset_dexcache.def +++ b/tools/cpp-define-generator/offset_dexcache.def @@ -19,16 +19,27 @@ #if defined(DEFINE_INCLUDE_DEPENDENCIES) #include "art_method.h" // art::ArtMethod #include "base/enums.h" // PointerSize +#include "mirror/dex_cache.h" // art::DexCache #endif -#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \ +#define DEFINE_ART_METHOD_OFFSET_SIZED(field_name, method_name) \ DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_32, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k32).Int32Value()) \ DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET_64, int32_t, art::ArtMethod::method_name##Offset(art::PointerSize::k64).Int32Value()) +#define DEFINE_ART_METHOD_OFFSET(field_name, method_name) \ + DEFINE_EXPR(ART_METHOD_ ## field_name ## _OFFSET, int32_t, art::ArtMethod::method_name##Offset().Int32Value()) + +#define DEFINE_DECLARING_CLASS_OFFSET(field_name, method_name) \ + DEFINE_EXPR(DECLARING_CLASS_ ## field_name ## _OFFSET, int32_t, art::mirror::Class::method_name##Offset().Int32Value()) + // New macro suffix Method Name (of the Offset method) -DEFINE_ART_METHOD_OFFSET(DEX_CACHE_METHODS, DexCacheResolvedMethods) -DEFINE_ART_METHOD_OFFSET(DEX_CACHE_TYPES, DexCacheResolvedTypes) -DEFINE_ART_METHOD_OFFSET(JNI, EntryPointFromJni) -DEFINE_ART_METHOD_OFFSET(QUICK_CODE, EntryPointFromQuickCompiledCode) +DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_METHODS, DexCacheResolvedMethods) +DEFINE_ART_METHOD_OFFSET_SIZED(DEX_CACHE_TYPES, DexCacheResolvedTypes) +DEFINE_ART_METHOD_OFFSET_SIZED(JNI, EntryPointFromJni) +DEFINE_ART_METHOD_OFFSET_SIZED(QUICK_CODE, EntryPointFromQuickCompiledCode) +DEFINE_ART_METHOD_OFFSET(DECLARING_CLASS, DeclaringClass) +DEFINE_DECLARING_CLASS_OFFSET(DEX_CACHE_STRINGS, DexCacheStrings) #undef DEFINE_ART_METHOD_OFFSET +#undef DEFINE_ART_METHOD_OFFSET_32 +#undef DEFINE_DECLARING_CLASS_OFFSET diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def index d2d8777100..13371a1f71 100644 --- a/tools/cpp-define-generator/offsets_all.def +++ b/tools/cpp-define-generator/offsets_all.def @@ -48,6 +48,7 @@ // TODO: MIRROR_*_ARRAY offsets (depends on header size) // TODO: MIRROR_STRING offsets (depends on header size) #include "offset_dexcache.def" +#include "constant_dexcache.def" #include "constant_heap.def" #include "constant_lockword.def" #include "constant_globals.def" diff --git a/tools/javafuzz/README.md b/tools/javafuzz/README.md index ca8532ae72..68fc171aa9 100644 --- a/tools/javafuzz/README.md +++ b/tools/javafuzz/README.md @@ -1,14 +1,14 @@ JavaFuzz ======== -JavaFuzz is tool for generating random Java programs with the objective of -fuzz testing the ART infrastructure. Each randomly generated Java program +JavaFuzz is a tool for generating random Java programs with the objective +of fuzz testing the ART infrastructure. Each randomly generated Java program can be run under various modes of execution, such as using the interpreter, using the optimizing compiler, using an external reference implementation, or using various target architectures. Any difference between the outputs -(a divergence) may indicate a bug in one of the execution modes. +(**divergence**) may indicate a bug in one of the execution modes. -JavaFuzz can be combined with dexfuzz to get multilayered fuzz testing. +JavaFuzz can be combined with dexfuzz to get multi-layered fuzz testing. How to run JavaFuzz =================== @@ -36,6 +36,25 @@ a fixed testing class named Test. So a typical test run looks as follows. jack -cp ${JACK_CLASSPATH} --output-dex . Test.java art -classpath classes.dex Test +How to start the JavaFuzz tests +=============================== + + run_java_fuzz_test.py [--num_tests] + [--device] + [--mode1=mode] [--mode2=mode] + +where + + --num_tests : number of tests to run (10000 by default) + --device : target device serial number (passed to adb -s) + --mode1 : m1 + --mode2 : m2, with m1 != m2, and values one of + ri = reference implementation on host (default for m1) + hint = Art interpreter on host + hopt = Art optimizing on host (default for m2) + tint = Art interpreter on target + topt = Art optimizing on target + Background ========== @@ -49,14 +68,15 @@ Still, any test suite leaves the developer wondering whether undetected bugs and flaws still linger in the system. Over the years, fuzz testing has gained popularity as a testing technique for -discovering such lingering bugs, including bugs that can bring down a system in -an unexpected way. Fuzzing refers to feeding a large amount of random data as -input to a system in an attempt to find bugs or make it crash. Mutation-based -fuzz testing is a special form of fuzzing that applies small random changes to -existing inputs in order to detect shortcomings in a system. Profile-guided or -coverage-guided fuzzing adds a direction to the way these random changes are -applied. Multilayer approaches generate random inputs that are subsequently -mutated at various stages of execution. +discovering such lingering bugs, including bugs that can bring down a system +in an unexpected way. Fuzzing refers to feeding a large amount of random data +as input to a system in an attempt to find bugs or make it crash. Generation- +based fuzz testing constructs random, but properly formatted input data. +Mutation-based fuzz testing applies small random changes to existing inputs +in order to detect shortcomings in a system. Profile-guided or coverage-guided +fuzzing adds a direction to the way these random changes are applied. Multi- +layered approaches generate random inputs that are subsequently mutated at +various stages of execution. The randomness of fuzz testing implies that the size and scope of testing is no longer bounded. Every new run can potentially discover bugs and crashes that were diff --git a/tools/javafuzz/javafuzz.cc b/tools/javafuzz/javafuzz.cc index 4e6e978043..161ae0a178 100644 --- a/tools/javafuzz/javafuzz.cc +++ b/tools/javafuzz/javafuzz.cc @@ -53,7 +53,9 @@ static constexpr const char* kRelOps[] = { "==", "!=", ">", ">=", "<", "<=" * to preserve the property that a given version of JavaFuzz yields the same * fuzzed Java program for a deterministic random seed. */ -const char* VERSION = "1.0"; +const char* VERSION = "1.1"; + +static const uint32_t MAX_DIMS[11] = { 0, 1000, 32, 10, 6, 4, 3, 3, 2, 2, 2 }; /** * A class that generates a random Java program that compiles correctly. The program @@ -83,8 +85,8 @@ class JavaFuzz { fuzz_loop_nest_(loop_nest), return_type_(randomType()), array_type_(randomType()), - array_dim_(random1(3)), - array_size_(random1(10)), + array_dim_(random1(10)), + array_size_(random1(MAX_DIMS[array_dim_])), indentation_(0), expr_depth_(0), stmt_length_(0), @@ -169,7 +171,7 @@ class JavaFuzz { // Emit an unary operator (same type in-out). void emitUnaryOp(Type tp) { if (tp == kBoolean) { - fputs("!", out_); + fputc('!', out_); } else if (isInteger(tp)) { EMIT(kIntUnaryOps); } else { // isFP(tp) @@ -239,16 +241,21 @@ class JavaFuzz { case 6: fputs("(long)(int)(long)", out_); return kLong; } } else if (tp == kFloat) { - switch (random1(3)) { + switch (random1(4)) { case 1: fputs("(float)", out_); return kInt; case 2: fputs("(float)", out_); return kLong; case 3: fputs("(float)", out_); return kDouble; + // Narrowing-widening. + case 4: fputs("(float)(int)(float)", out_); return kFloat; } } else if (tp == kDouble) { - switch (random1(3)) { + switch (random1(5)) { case 1: fputs("(double)", out_); return kInt; case 2: fputs("(double)", out_); return kLong; case 3: fputs("(double)", out_); return kFloat; + // Narrowing-widening. + case 4: fputs("(double)(int)(double)", out_); return kDouble; + case 5: fputs("(double)(float)(double)", out_); return kDouble; } } return tp; // nothing suitable, just keep type @@ -273,15 +280,17 @@ class JavaFuzz { // Emit an unary intrinsic (out type given, new suitable in type picked). Type emitIntrinsic1(Type tp) { if (tp == kBoolean) { - switch (random1(4)) { + switch (random1(6)) { case 1: fputs("Float.isNaN", out_); return kFloat; - case 2: fputs("Float.isInfinite", out_); return kFloat; - case 3: fputs("Double.isNaN", out_); return kDouble; - case 4: fputs("Double.isInfinite", out_); return kDouble; + case 2: fputs("Float.isFinite", out_); return kFloat; + case 3: fputs("Float.isInfinite", out_); return kFloat; + case 4: fputs("Double.isNaN", out_); return kDouble; + case 5: fputs("Double.isFinite", out_); return kDouble; + case 6: fputs("Double.isInfinite", out_); return kDouble; } } else if (isInteger(tp)) { const char* prefix = tp == kLong ? "Long" : "Integer"; - switch (random1(9)) { + switch (random1(13)) { case 1: fprintf(out_, "%s.highestOneBit", prefix); break; case 2: fprintf(out_, "%s.lowestOneBit", prefix); break; case 3: fprintf(out_, "%s.numberOfLeadingZeros", prefix); break; @@ -290,15 +299,27 @@ class JavaFuzz { case 6: fprintf(out_, "%s.signum", prefix); break; case 7: fprintf(out_, "%s.reverse", prefix); break; case 8: fprintf(out_, "%s.reverseBytes", prefix); break; - case 9: fputs("Math.abs", out_); break; + case 9: fputs("Math.incrementExact", out_); break; + case 10: fputs("Math.decrementExact", out_); break; + case 11: fputs("Math.negateExact", out_); break; + case 12: fputs("Math.abs", out_); break; + case 13: fputs("Math.round", out_); + return tp == kLong ? kDouble : kFloat; } } else { // isFP(tp) - switch (random1(5)) { + switch (random1(6)) { case 1: fputs("Math.abs", out_); break; case 2: fputs("Math.ulp", out_); break; case 3: fputs("Math.signum", out_); break; case 4: fputs("Math.nextUp", out_); break; case 5: fputs("Math.nextDown", out_); break; + case 6: if (tp == kDouble) { + fputs("Double.longBitsToDouble", out_); + return kLong; + } else { + fputs("Float.intBitsToFloat", out_); + return kInt; + } } } return tp; // same type in-out @@ -314,15 +335,27 @@ class JavaFuzz { } } else if (isInteger(tp)) { const char* prefix = tp == kLong ? "Long" : "Integer"; - switch (random1(3)) { + switch (random1(11)) { case 1: fprintf(out_, "%s.compare", prefix); break; - case 2: fputs("Math.min", out_); break; - case 3: fputs("Math.max", out_); break; + case 2: fprintf(out_, "%s.sum", prefix); break; + case 3: fprintf(out_, "%s.min", prefix); break; + case 4: fprintf(out_, "%s.max", prefix); break; + case 5: fputs("Math.min", out_); break; + case 6: fputs("Math.max", out_); break; + case 7: fputs("Math.floorDiv", out_); break; + case 8: fputs("Math.floorMod", out_); break; + case 9: fputs("Math.addExact", out_); break; + case 10: fputs("Math.subtractExact", out_); break; + case 11: fputs("Math.multiplyExact", out_); break; } } else { // isFP(tp) - switch (random1(2)) { - case 1: fputs("Math.min", out_); break; - case 2: fputs("Math.max", out_); break; + const char* prefix = tp == kDouble ? "Double" : "Float"; + switch (random1(5)) { + case 1: fprintf(out_, "%s.sum", prefix); break; + case 2: fprintf(out_, "%s.min", prefix); break; + case 3: fprintf(out_, "%s.max", prefix); break; + case 4: fputs("Math.min", out_); break; + case 5: fputs("Math.max", out_); break; } } return tp; // same type in-out @@ -358,12 +391,24 @@ class JavaFuzz { // Emit miscellaneous constructs. void emitMisc(Type tp) { - switch (tp) { - case kBoolean: fputs("this instanceof Test", out_); break; - case kInt: fputs("mArray.length", out_); break; - case kLong: fputs("Long.MAX_VALUE", out_); break; - case kFloat: fputs("Float.MAX_VALUE", out_); break; - case kDouble: fputs("Double.MAX_VALUE", out_); break; + if (tp == kBoolean) { + fputs("this instanceof Test", out_); + } else if (isInteger(tp)) { + const char* prefix = tp == kLong ? "Long" : "Integer"; + switch (random1(2)) { + case 1: fprintf(out_, "%s.MIN_VALUE", prefix); break; + case 2: fprintf(out_, "%s.MAX_VALUE", prefix); break; + } + } else { // isFP(tp) + const char* prefix = tp == kDouble ? "Double" : "Float"; + switch (random1(6)) { + case 1: fprintf(out_, "%s.MIN_NORMAL", prefix); break; + case 2: fprintf(out_, "%s.MIN_VALUE", prefix); break; + case 3: fprintf(out_, "%s.MAX_VALUE", prefix); break; + case 4: fprintf(out_, "%s.POSITIVE_INFINITY", prefix); break; + case 5: fprintf(out_, "%s.NEGATIVE_INFINITY", prefix); break; + case 6: fprintf(out_, "%s.NaN", prefix); break; + } } } @@ -412,10 +457,10 @@ class JavaFuzz { void emitLiteral(Type tp) { switch (tp) { case kBoolean: fputs(random1(2) == 1 ? "true" : "false", out_); break; - case kInt: fprintf(out_, "%d", random0(100)); break; - case kLong: fprintf(out_, "%dL", random0(100)); break; - case kFloat: fprintf(out_, "%d.0f", random0(100)); break; - case kDouble: fprintf(out_, "%d.0", random0(100)); break; + case kInt: fprintf(out_, "%d", random()); break; + case kLong: fprintf(out_, "%dL", random()); break; + case kFloat: fprintf(out_, "%d.0f", random()); break; + case kDouble: fprintf(out_, "%d.0", random()); break; } } @@ -433,17 +478,6 @@ class JavaFuzz { return false; } - // Emit a loop variable, if available. - bool emitLoopVariable(Type tp) { - if (tp == kInt) { - if (loop_nest_ > 0) { - fprintf(out_, "i%u", random0(loop_nest_)); - return true; - } - } - return false; - } - // Emit a local variable, if available. bool emitLocalVariable(Type tp) { uint32_t locals = adjustLocal(tp, 0); @@ -483,10 +517,6 @@ class JavaFuzz { if (emitLocalVariable(tp)) return; // FALL-THROUGH - case 3: - if (emitLoopVariable(tp)) - return; - // FALL-THROUGH default: emitFieldVariable(tp); break; @@ -510,8 +540,9 @@ class JavaFuzz { fputc('(', out_); switch (random1(12)) { // favor binary operations case 1: - // Unary operator: ~x + // Unary operator: ~ x emitUnaryOp(tp); + fputc(' ', out_); emitExpression(tp); break; case 2: @@ -761,7 +792,7 @@ class JavaFuzz { bool mayFollow = false; fputs("switch (", out_); - emitExpression(kInt); + emitArrayIndex(); // restrict its range fputs(") {\n", out_); ++if_nest_; @@ -771,7 +802,7 @@ class JavaFuzz { for (uint32_t i = 0; i < 2; i++) { emitIndentation(); if (i == 0) { - fprintf(out_, "case %d: {\n", random0(100)); + fprintf(out_, "case %u: {\n", random0(array_size_)); } else { fprintf(out_, "default: {\n"); } @@ -977,6 +1008,11 @@ class JavaFuzz { // Random integers. // + // Return random integer. + int32_t random() { + return fuzz_random_engine_(); + } + // Return random integer in range [0,max). uint32_t random0(uint32_t max) { std::uniform_int_distribution<uint32_t> gen(0, max - 1); @@ -1025,7 +1061,7 @@ int32_t main(int32_t argc, char** argv) { // Defaults. uint32_t seed = time(NULL); uint32_t expr_depth = 1; - uint32_t stmt_length = 4; + uint32_t stmt_length = 8; uint32_t if_nest = 2; uint32_t loop_nest = 3; diff --git a/tools/javafuzz/run_java_fuzz_test.py b/tools/javafuzz/run_java_fuzz_test.py new file mode 100755 index 0000000000..5f527b804b --- /dev/null +++ b/tools/javafuzz/run_java_fuzz_test.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python2 +# +# Copyright (C) 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import argparse +import subprocess +import sys +import os + +from tempfile import mkdtemp +from threading import Timer + +# Normalized return codes. +EXIT_SUCCESS = 0 +EXIT_TIMEOUT = 1 +EXIT_NOTCOMPILED = 2 +EXIT_NOTRUN = 3 + +# +# Utility methods. +# + +def RunCommand(cmd, args, out, err, timeout = 5): + """Executes a command, and returns its return code. + + Args: + cmd: string, a command to execute + args: string, arguments to pass to command (or None) + out: string, file name to open for stdout (or None) + err: string, file name to open for stderr (or None) + timeout: int, time out in seconds + Returns: + return code of running command (forced EXIT_TIMEOUT on timeout) + """ + cmd = 'exec ' + cmd # preserve pid + if args != None: + cmd = cmd + ' ' + args + outf = None + if out != None: + outf = open(out, mode='w') + errf = None + if err != None: + errf = open(err, mode='w') + proc = subprocess.Popen(cmd, stdout=outf, stderr=errf, shell=True) + timer = Timer(timeout, proc.kill) # enforces timeout + timer.start() + proc.communicate() + if timer.is_alive(): + timer.cancel() + returncode = proc.returncode + else: + returncode = EXIT_TIMEOUT + if outf != None: + outf.close() + if errf != None: + errf.close() + return returncode + +def GetJackClassPath(): + """Returns Jack's classpath.""" + top = os.environ.get('ANDROID_BUILD_TOP') + if top == None: + raise FatalError('Cannot find AOSP build top') + libdir = top + '/out/host/common/obj/JAVA_LIBRARIES' + return libdir + '/core-libart-hostdex_intermediates/classes.jack:' \ + + libdir + '/core-oj-hostdex_intermediates/classes.jack' + +def GetExecutionModeRunner(device, mode): + """Returns a runner for the given execution mode. + + Args: + device: string, target device serial number (or None) + mode: string, execution mode + Returns: + TestRunner with given execution mode + Raises: + FatalError: error for unknown execution mode + """ + if mode == 'ri': + return TestRunnerRIOnHost() + if mode == 'hint': + return TestRunnerArtOnHost(True) + if mode == 'hopt': + return TestRunnerArtOnHost(False) + if mode == 'tint': + return TestRunnerArtOnTarget(device, True) + if mode == 'topt': + return TestRunnerArtOnTarget(device, False) + raise FatalError('Unknown execution mode') + +def GetReturnCode(retc): + """Returns a string representation of the given normalized return code. + Args: + retc: int, normalized return code + Returns: + string representation of normalized return code + Raises: + FatalError: error for unknown normalized return code + """ + if retc == EXIT_SUCCESS: + return 'SUCCESS' + if retc == EXIT_TIMEOUT: + return 'TIMED-OUT' + if retc == EXIT_NOTCOMPILED: + return 'NOT-COMPILED' + if retc == EXIT_NOTRUN: + return 'NOT-RUN' + raise FatalError('Unknown normalized return code') + +# +# Execution mode classes. +# + +class TestRunner(object): + """Abstraction for running a test in a particular execution mode.""" + __meta_class__ = abc.ABCMeta + + def GetDescription(self): + """Returns a description string of the execution mode.""" + return self._description + + def GetId(self): + """Returns a short string that uniquely identifies the execution mode.""" + return self._id + + @abc.abstractmethod + def CompileAndRunTest(self): + """Compile and run the generated test. + + Ensures that the current Test.java in the temporary directory is compiled + and executed under the current execution mode. On success, transfers the + generated output to the file GetId()_out.txt in the temporary directory. + Cleans up after itself. + + Most nonzero return codes are assumed non-divergent, since systems may + exit in different ways. This is enforced by normalizing return codes. + + Returns: + normalized return code + """ + pass + +class TestRunnerRIOnHost(TestRunner): + """Concrete test runner of the reference implementation on host.""" + + def __init__(self): + """Constructor for the RI tester.""" + self._description = 'RI on host' + self._id = 'RI' + + def CompileAndRunTest(self): + if RunCommand('javac', 'Test.java', + out=None, err=None, timeout=30) == EXIT_SUCCESS: + retc = RunCommand('java', 'Test', 'RI_run_out.txt', err=None) + if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT: + retc = EXIT_NOTRUN + else: + retc = EXIT_NOTCOMPILED + # Cleanup and return. + RunCommand('rm', '-f Test.class', out=None, err=None) + return retc + +class TestRunnerArtOnHost(TestRunner): + """Concrete test runner of Art on host (interpreter or optimizing).""" + + def __init__(self, interpreter): + """Constructor for the Art on host tester. + + Args: + interpreter: boolean, selects between interpreter or optimizing + """ + self._art_args = '-cp classes.dex Test' + if interpreter: + self._description = 'Art interpreter on host' + self._id = 'HInt' + self._art_args = '-Xint ' + self._art_args + else: + self._description = 'Art optimizing on host' + self._id = 'HOpt' + self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java' + + def CompileAndRunTest(self): + if RunCommand('jack', self._jack_args, + out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS: + out = self.GetId() + '_run_out.txt' + retc = RunCommand('art', self._art_args, out, 'arterr.txt') + if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT: + retc = EXIT_NOTRUN + else: + retc = EXIT_NOTCOMPILED + # Cleanup and return. + RunCommand('rm', '-rf classes.dex jackerr.txt arterr.txt android-data*', + out=None, err=None) + return retc + +# TODO: very rough first version without proper cache, +# reuse staszkiewicz' module for properly setting up dalvikvm on target. +class TestRunnerArtOnTarget(TestRunner): + """Concrete test runner of Art on target (interpreter or optimizing).""" + + def __init__(self, device, interpreter): + """Constructor for the Art on target tester. + + Args: + device: string, target device serial number (or None) + interpreter: boolean, selects between interpreter or optimizing + """ + self._dalvik_args = 'shell dalvikvm -cp /data/local/tmp/classes.dex Test' + if interpreter: + self._description = 'Art interpreter on target' + self._id = 'TInt' + self._dalvik_args = '-Xint ' + self._dalvik_args + else: + self._description = 'Art optimizing on target' + self._id = 'TOpt' + self._adb = 'adb' + if device != None: + self._adb = self._adb + ' -s ' + device + self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java' + + def CompileAndRunTest(self): + if RunCommand('jack', self._jack_args, + out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS: + if RunCommand(self._adb, 'push classes.dex /data/local/tmp/', + 'adb.txt', err=None) != EXIT_SUCCESS: + raise FatalError('Cannot push to target device') + out = self.GetId() + '_run_out.txt' + retc = RunCommand(self._adb, self._dalvik_args, out, err=None) + if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT: + retc = EXIT_NOTRUN + else: + retc = EXIT_NOTCOMPILED + # Cleanup and return. + RunCommand('rm', '-f classes.dex jackerr.txt adb.txt', + out=None, err=None) + RunCommand(self._adb, 'shell rm -f /data/local/tmp/classes.dex', + out=None, err=None) + return retc + +# +# Tester classes. +# + +class FatalError(Exception): + """Fatal error in the tester.""" + pass + +class JavaFuzzTester(object): + """Tester that runs JavaFuzz many times and report divergences.""" + + def __init__(self, num_tests, device, mode1, mode2): + """Constructor for the tester. + + Args: + num_tests: int, number of tests to run + device: string, target device serial number (or None) + mode1: string, execution mode for first runner + mode2: string, execution mode for second runner + """ + self._num_tests = num_tests + self._device = device + self._runner1 = GetExecutionModeRunner(device, mode1) + self._runner2 = GetExecutionModeRunner(device, mode2) + self._save_dir = None + self._tmp_dir = None + # Statistics. + self._test = 0 + self._num_success = 0 + self._num_not_compiled = 0 + self._num_not_run = 0 + self._num_timed_out = 0 + self._num_divergences = 0 + + def __enter__(self): + """On entry, enters new temp directory after saving current directory. + + Raises: + FatalError: error when temp directory cannot be constructed + """ + self._save_dir = os.getcwd() + self._tmp_dir = mkdtemp(dir="/tmp/") + if self._tmp_dir == None: + raise FatalError('Cannot obtain temp directory') + os.chdir(self._tmp_dir) + return self + + def __exit__(self, etype, evalue, etraceback): + """On exit, re-enters previously saved current directory and cleans up.""" + os.chdir(self._save_dir) + if self._num_divergences == 0: + RunCommand('rm', '-rf ' + self._tmp_dir, out=None, err=None) + + def Run(self): + """Runs JavaFuzz many times and report divergences.""" + print + print '**\n**** JavaFuzz Testing\n**' + print + print '#Tests :', self._num_tests + print 'Device :', self._device + print 'Directory :', self._tmp_dir + print 'Exec-mode1:', self._runner1.GetDescription() + print 'Exec-mode2:', self._runner2.GetDescription() + print + self.ShowStats() + for self._test in range(1, self._num_tests + 1): + self.RunJavaFuzzTest() + self.ShowStats() + if self._num_divergences == 0: + print '\n\nsuccess (no divergences)\n' + else: + print '\n\nfailure (divergences)\n' + + def ShowStats(self): + """Shows current statistics (on same line) while tester is running.""" + print '\rTests:', self._test, \ + 'Success:', self._num_success, \ + 'Not-compiled:', self._num_not_compiled, \ + 'Not-run:', self._num_not_run, \ + 'Timed-out:', self._num_timed_out, \ + 'Divergences:', self._num_divergences, + sys.stdout.flush() + + def RunJavaFuzzTest(self): + """Runs a single JavaFuzz test, comparing two execution modes.""" + self.ConstructTest() + retc1 = self._runner1.CompileAndRunTest() + retc2 = self._runner2.CompileAndRunTest() + self.CheckForDivergence(retc1, retc2) + self.CleanupTest() + + def ConstructTest(self): + """Use JavaFuzz to generate next Test.java test. + + Raises: + FatalError: error when javafuzz fails + """ + if RunCommand('javafuzz', args=None, + out='Test.java', err=None) != EXIT_SUCCESS: + raise FatalError('Unexpected error while running JavaFuzz') + + def CheckForDivergence(self, retc1, retc2): + """Checks for divergences and updates statistics. + + Args: + retc1: int, normalized return code of first runner + retc2: int, normalized return code of second runner + """ + if retc1 == retc2: + # Non-divergent in return code. + if retc1 == EXIT_SUCCESS: + # Both compilations and runs were successful, inspect generated output. + args = self._runner1.GetId() + '_run_out.txt ' \ + + self._runner2.GetId() + '_run_out.txt' + if RunCommand('diff', args, out=None, err=None) != EXIT_SUCCESS: + self.ReportDivergence('divergence in output') + else: + self._num_success += 1 + elif retc1 == EXIT_TIMEOUT: + self._num_timed_out += 1 + elif retc1 == EXIT_NOTCOMPILED: + self._num_not_compiled += 1 + else: + self._num_not_run += 1 + else: + # Divergent in return code. + self.ReportDivergence('divergence in return code: ' + + GetReturnCode(retc1) + ' vs. ' + + GetReturnCode(retc2)) + + def ReportDivergence(self, reason): + """Reports and saves a divergence.""" + self._num_divergences += 1 + print '\n', self._test, reason + # Save. + ddir = 'divergence' + str(self._test) + RunCommand('mkdir', ddir, out=None, err=None) + RunCommand('mv', 'Test.java *.txt ' + ddir, out=None, err=None) + + def CleanupTest(self): + """Cleans up after a single test run.""" + RunCommand('rm', '-f Test.java *.txt', out=None, err=None) + + +def main(): + # Handle arguments. + parser = argparse.ArgumentParser() + parser.add_argument('--num_tests', default=10000, + type=int, help='number of tests to run') + parser.add_argument('--device', help='target device serial number') + parser.add_argument('--mode1', default='ri', + help='execution mode 1 (default: ri)') + parser.add_argument('--mode2', default='hopt', + help='execution mode 2 (default: hopt)') + args = parser.parse_args() + if args.mode1 == args.mode2: + raise FatalError("Identical execution modes given") + # Run the JavaFuzz tester. + with JavaFuzzTester(args.num_tests, args.device, + args.mode1, args.mode2) as fuzzer: + fuzzer.Run() + +if __name__ == "__main__": + main() diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index cbb6e1d6b9..6472c8d804 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -220,13 +220,6 @@ names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ] }, { - description: "Made for extending, shouldn't be run", - result: EXEC_FAILED, - names: ["jsr166.CollectionTest#testEmptyMeansEmpty", - "jsr166.Collection8Test#testForEach", - "jsr166.Collection8Test#testForEachConcurrentStressTest"] -}, -{ description: "Flaky test", result: EXEC_FAILED, bug: 30107038, |