diff options
161 files changed, 7843 insertions, 1563 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index c4374f7101..0a465c4c22 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -83,16 +83,16 @@ ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DE ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX)) $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) # Dex file dependencies for each gtest. ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested @@ -171,6 +171,12 @@ ART_GTEST_dex2oat_test_TARGET_DEPS := \ # TODO: document why this is needed. ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_DEFAULT_64) $(HOST_CORE_IMAGE_DEFAULT_32) +# The dexdiag test requires the dexdiag utility. +ART_GTEST_dexdiag_test_HOST_DEPS := \ + $(HOST_OUT_EXECUTABLES)/dexdiag +ART_GTEST_dexdiag_test_TARGET_DEPS := \ + dexdiag + # The dexdump test requires an image and the dexdump utility. # TODO: rename into dexdump when migration completes ART_GTEST_dexdump_test_HOST_DEPS := \ @@ -244,6 +250,7 @@ ART_TEST_MODULES := \ art_compiler_tests \ art_compiler_host_tests \ art_dex2oat_tests \ + art_dexdiag_tests \ art_dexdump_tests \ art_dexlayout_tests \ art_dexlist_tests \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index fbfa087cfd..a8ab7c6091 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2030,16 +2030,18 @@ void CompilerDriver::Verify(jobject jclass_loader, } } - // Note: verification should not be pulling in classes anymore when compiling the boot image, - // as all should have been resolved before. As such, doing this in parallel should still - // be deterministic. + // Verification updates VerifierDeps and needs to run single-threaded to be deterministic. + bool force_determinism = GetCompilerOptions().IsForceDeterminism(); + ThreadPool* verify_thread_pool = + force_determinism ? single_thread_pool_.get() : parallel_thread_pool_.get(); + size_t verify_thread_count = force_determinism ? 1U : parallel_thread_count_; for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); VerifyDexFile(jclass_loader, *dex_file, dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, + verify_thread_pool, + verify_thread_count, timings); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 874e35716c..fbab9dfbaf 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -179,6 +179,40 @@ class CompilerDriver { uint16_t class_def_index, bool requires) REQUIRES(!requires_constructor_barrier_lock_); + + // Do the <init> methods for this class require a constructor barrier (prior to the return)? + // The answer is "yes", if and only if this class has any instance final fields. + // (This must not be called for any non-<init> methods; the answer would be "no"). + // + // --- + // + // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end + // of the invoked constructor. The constructor barrier is a conservative implementation means of + // enforcing the freezes happen-before the object being constructed is observable by another + // thread. + // + // Note: This question only makes sense for instance constructors; + // static constructors (despite possibly having finals) never need + // a barrier. + // + // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes + // class initialization as: + // + // lock(class.lock) + // class.state = initializing + // unlock(class.lock) + // + // invoke <clinit> + // + // lock(class.lock) + // class.state = initialized + // unlock(class.lock) <-- acts as a release + // + // The last operation in the above example acts as an atomic release + // for any stores in <clinit>, which ends up being stricter + // than what a constructor barrier needs. + // + // See also QuasiAtomic::ThreadFenceForConstructor(). bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, uint16_t class_def_index) diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index f55d5a6fb8..e9d579d2b3 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -249,7 +249,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, // All remaining method call patches will be handled by this thunk. DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), - MaxPositiveDisplacement(ThunkType::kMethodCall)); + MaxPositiveDisplacement(GetMethodCallKey())); unprocessed_method_call_patches_.clear(); } } @@ -271,8 +271,8 @@ uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_ DCHECK(method_call_thunk_ != nullptr); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // NOTE: With unsigned arithmetic we do mean to use && rather than || below. if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { // Unwritten thunks have higher offsets, check if it's within range. @@ -299,29 +299,42 @@ uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint3 if (data.HasWrittenOffset()) { uint32_t offset = data.LastWrittenOffset(); DCHECK_LT(offset, patch_offset); - if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) { + if (patch_offset - offset <= MaxNegativeDisplacement(key)) { return offset; } } DCHECK(data.HasPendingOffset()); uint32_t offset = data.GetPendingOffset(); DCHECK_GT(offset, patch_offset); - DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType())); + DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key)); return offset; } +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() { + return ThunkKey(ThunkType::kMethodCall, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) +} + +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey( + const LinkerPatch& patch) { + DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); + ThunkParams params; + params.baker_params.custom_value1 = patch.GetBakerCustomValue1(); + params.baker_params.custom_value2 = patch.GetBakerCustomValue2(); + ThunkKey key(ThunkType::kBakerReadBarrier, params); + return key; +} + void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset) { for (const LinkerPatch& patch : compiled_method->GetPatches()) { uint32_t patch_offset = code_offset + patch.LiteralOffset(); - ThunkType key_type = static_cast<ThunkType>(-1); + ThunkKey key(static_cast<ThunkType>(-1), ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) ThunkData* old_data = nullptr; if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - key_type = ThunkType::kMethodCall; + key = GetMethodCallKey(); unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); if (method_call_thunk_ == nullptr) { - ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); method_call_thunk_ = &it->second; AddUnreservedThunk(method_call_thunk_); @@ -329,11 +342,10 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho old_data = method_call_thunk_; } } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - ThunkKey key = GetBakerReadBarrierKey(patch); - key_type = key.GetType(); + key = GetBakerThunkKey(patch); auto lb = thunks_.lower_bound(key); if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); AddUnreservedThunk(&it->second); } else { @@ -342,16 +354,16 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho } if (old_data != nullptr) { // Shared path where an old thunk may need an update. - DCHECK(key_type != static_cast<ThunkType>(-1)); + DCHECK(key.GetType() != static_cast<ThunkType>(-1)); DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); if (old_data->NeedsNextThunk()) { // Patches for a method are ordered by literal offset, so if we still need to place // this thunk for a previous patch, that thunk shall be in range for this patch. - DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type)); + DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key)); } else { if (!old_data->HasReservedOffset() || - patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) { - old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type)); + patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) { + old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key)); AddUnreservedThunk(old_data); } } @@ -385,8 +397,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, DCHECK(!unreserved_thunks_.empty()); DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK(method_call_thunk_ != nullptr); - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // Process as many patches as possible, stop only on unresolved targets or calls too far back. while (!unprocessed_method_call_patches_.empty()) { MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); @@ -439,8 +451,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, } inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, - ThunkType type) { - return RoundDown(patch_offset + MaxPositiveDisplacement(type), + const ThunkKey& key) { + return RoundDown(patch_offset + MaxPositiveDisplacement(key), GetInstructionSetAlignment(instruction_set_)); } diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index 47f840fd65..fd204c05a6 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -42,29 +42,12 @@ class ArmBaseRelativePatcher : public RelativePatcher { enum class ThunkType { kMethodCall, // Method call thunk. - kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset. - kBakerReadBarrierArray, // Baker read barrier, array load with index in register. - kBakerReadBarrierRoot, // Baker read barrier, GC root load. + kBakerReadBarrier, // Baker read barrier. }; - struct BakerReadBarrierFieldParams { - uint32_t holder_reg; // Holder object for reading lock word. - uint32_t base_reg; // Base register, different from holder for large offset. - // If base differs from holder, it should be a pre-defined - // register to limit the number of thunks we need to emit. - // The offset is retrieved using introspection. - }; - - struct BakerReadBarrierArrayParams { - uint32_t base_reg; // Reference to the start of the data. - uint32_t dummy; // Dummy field. - // The index register is retrieved using introspection - // to limit the number of thunks we need to emit. - }; - - struct BakerReadBarrierRootParams { - uint32_t root_reg; // The register holding the GC root. - uint32_t dummy; // Dummy field. + struct BakerReadBarrierParams { + uint32_t custom_value1; + uint32_t custom_value2; }; struct RawThunkParams { @@ -74,12 +57,8 @@ class ArmBaseRelativePatcher : public RelativePatcher { union ThunkParams { RawThunkParams raw_params; - BakerReadBarrierFieldParams field_params; - BakerReadBarrierArrayParams array_params; - BakerReadBarrierRootParams root_params; - static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check"); - static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check"); - static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check"); + BakerReadBarrierParams baker_params; + static_assert(sizeof(raw_params) == sizeof(baker_params), "baker_params size check"); }; class ThunkKey { @@ -90,19 +69,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { return type_; } - BakerReadBarrierFieldParams GetFieldParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierField); - return params_.field_params; - } - - BakerReadBarrierArrayParams GetArrayParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierArray); - return params_.array_params; - } - - BakerReadBarrierRootParams GetRootParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierRoot); - return params_.root_params; + BakerReadBarrierParams GetBakerReadBarrierParams() const { + DCHECK(type_ == ThunkType::kBakerReadBarrier); + return params_.baker_params; } RawThunkParams GetRawParams() const { @@ -127,6 +96,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { } }; + static ThunkKey GetMethodCallKey(); + static ThunkKey GetBakerThunkKey(const LinkerPatch& patch); + uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref, @@ -136,10 +108,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, uint32_t target_offset); - virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0; virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; - virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0; - virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0; + virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0; + virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0; private: class ThunkData; @@ -149,7 +120,7 @@ class ArmBaseRelativePatcher : public RelativePatcher { void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); - uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type); + uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key); RelativePatcherTargetProvider* const provider_; const InstructionSet instruction_set_; diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 1a5d79ce70..a98aedfc69 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -16,9 +16,16 @@ #include "linker/arm/relative_patcher_thumb2.h" +#include "arch/arm/asm_support_arm.h" #include "art_method.h" +#include "base/bit_utils.h" #include "compiled_method.h" -#include "utils/arm/assembler_thumb2.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "lock_word.h" +#include "mirror/object.h" +#include "mirror/array-inl.h" +#include "read_barrier.h" +#include "utils/arm/assembler_arm_vixl.h" namespace art { namespace linker { @@ -32,6 +39,12 @@ static constexpr int32_t kPcDisplacement = 4; constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; +// Maximum positive and negative displacement for a conditional branch measured from the patch +// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured +// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) +constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; +constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; + Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) : ArmBaseRelativePatcher(provider, kThumb2) { } @@ -84,29 +97,259 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, SetInsn32(code, literal_offset, insn); } -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; +void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + DCHECK_ALIGNED(patch_offset, 2u); + uint32_t literal_offset = patch.LiteralOffset(); + DCHECK_ALIGNED(literal_offset, 2u); + DCHECK_LT(literal_offset, code->size()); + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) + ThunkKey key = GetBakerThunkKey(patch); + if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code->size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(code, literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } + break; + } + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + UNREACHABLE(); + } + } + uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); + DCHECK_ALIGNED(target_offset, 4u); + uint32_t disp = target_offset - (patch_offset + kPcDisplacement); + DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. + insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". + ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". + ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". + ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". + ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". + SetInsn32(code, literal_offset, insn); } -ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; - UNREACHABLE(); +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, + vixl::aarch32::Register base_reg, + vixl::aarch32::MemOperand& lock_word, + vixl::aarch32::Label* slow_path, + int32_t raw_ldr_offset) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target */ false); + __ Add(lr, lr, raw_ldr_offset); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, + uint32_t encoded_data) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) + // that performs further checks on the reference and marks it if needed. + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have + // emitted an explicit null check before the load. Otherwise, we need to null-check + // the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch32::Label throw_npe; + if (holder_reg.Is(base_reg)) { + __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); + } + vixl::aarch32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + Register ep_reg(kBakerCcEntrypointRegister); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(ep_reg); // Jump to the entrypoint. + if (holder_reg.Is(base_reg)) { + // Add null check slow path. The stack map is at the address pointed to by LR. + __ Bind(&throw_npe); + int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); + __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); + __ Bx(ip); + } + break; + } + case BakerReadBarrierKind::kArray: { + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create + __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to art_quick_read_barrier_mark_introspection_gc_roots. + Register ep_reg(kBakerCcEntrypointRegister); + int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } } std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - DCHECK(key.GetType() == ThunkType::kMethodCall); - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. ArenaPool pool; ArenaAllocator arena(&pool); - arm::Thumb2Assembler assembler(&arena); - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); + arm::ArmVIXLAssembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl::aarch32::pc, + vixl::aarch32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + __ Bkpt(0); + break; + case ThunkType::kBakerReadBarrier: + CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1); + break; + } + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); @@ -114,19 +357,29 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { return thunk_code; } -uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallPositiveDisplacement; +#undef __ + +uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallPositiveDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondPositiveDisplacement; + } } -uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallNegativeDisplacement; +uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallNegativeDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondNegativeDisplacement; + } } void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); uint8_t* addr = &(*code)[offset]; addr[0] = (value >> 16) & 0xff; addr[1] = (value >> 24) & 0xff; @@ -136,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); const uint8_t* addr = &code[offset]; return (static_cast<uint32_t>(addr[0]) << 16) + @@ -151,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { return GetInsn32(ArrayRef<const uint8_t>(*code), offset); } +uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 2u, code.size()); + DCHECK_ALIGNED(offset, 2u); + const uint8_t* addr = &code[offset]; + return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); +} + +template <typename Vector> +uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { + static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); + return GetInsn16(ArrayRef<const uint8_t>(*code), offset); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index ab37802d0f..7e787d2916 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -17,13 +17,57 @@ #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#include "arch/arm/registers_arm.h" +#include "base/array_ref.h" +#include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm { +class ArmVIXLAssembler; +} // namespace arm + namespace linker { class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: + static constexpr uint32_t kBakerCcEntrypointRegister = 4u; + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { + CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); + } + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); void PatchCall(std::vector<uint8_t>* code, @@ -39,18 +83,58 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast + }; + + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast + }; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBitsForRegister = 4u; + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, + kBitsForBakerReadBarrierWidth>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; + } + + void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); template <typename Vector> static uint32_t GetInsn32(Vector* code, uint32_t offset); + static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Vector> + static uint32_t GetInsn16(Vector* code, uint32_t offset); + friend class Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index f08270d934..af5fa40dc1 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -14,8 +14,12 @@ * limitations under the License. */ +#include "base/casts.h" #include "linker/relative_patcher_test.h" #include "linker/arm/relative_patcher_thumb2.h" +#include "lock_word.h" +#include "mirror/array-inl.h" +#include "mirror/object.h" #include "oat_quick_method_header.h" namespace art { @@ -34,13 +38,102 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; static const uint32_t kPcInsnOffset; + // The PC in Thumb mode is 4 bytes after the instruction location. + static constexpr uint32_t kPcAdjustment = 4u; + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + static constexpr uint32_t kBlPlus0 = 0xf000f800u; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; - static constexpr uint32_t kBlMinusMax = 0xf400d000; + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; + static constexpr uint32_t kBlMinusMax = 0xf400d000u; + + // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. + static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + + // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. + static constexpr uint32_t kLdrInsn = 0x6800u; + + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. + static constexpr uint32_t kLdrWInsn = 0xf8d00000u; + + // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. + static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; + + // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. + static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; + + // NOP instructions. + static constexpr uint32_t kNopInsn = 0xbf00u; + static constexpr uint32_t kNopWInsn = 0xf3af8000u; + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + if (IsUint<16>(insn)) { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } else { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn >> 16), + static_cast<uint8_t>(insn >> 24), + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + } + + void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { + InsertInsn(code, code->size(), insn); + } + + std::vector<uint8_t> GenNops(size_t num_nops) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u); + for (size_t i = 0; i != num_nops; ++i) { + PushBackInsn(&result, kNopInsn); + } + return result; + } + + std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { + std::vector<uint8_t> raw_code; + size_t number_of_16_bit_insns = + std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); + raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); + for (uint32_t insn : insns) { + PushBackInsn(&raw_code, insn); + } + return raw_code; + } + + uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { + if (!IsAligned<2u>(bne_offset)) { + LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; + return 0xffffffffu; // Fails code diff later. + } + if (!IsAligned<2u>(target_offset)) { + LOG(ERROR) << "Unaligned target_offset: " << target_offset; + return 0xffffffffu; // Fails code diff later. + } + uint32_t diff = target_offset - bne_offset - kPcAdjustment; + DCHECK_ALIGNED(diff, 2u); + if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { + LOG(ERROR) << "Target out of range: " << diff; + return 0xffffffffu; // Fails code diff later. + } + return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 + | (((diff >> 12) & 0x3fu) << 16) // imm6 + | (((diff >> 18) & 1) << 13) // J1 + | (((diff >> 19) & 1) << 11) // J2 + | (((diff >> 20) & 1) << 26); // S + } bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, @@ -95,9 +188,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -125,19 +216,57 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> result; result.reserve(num_nops * 2u + 4u); for (size_t i = 0; i != num_nops; ++i) { - result.push_back(0x00); - result.push_back(0xbf); + PushBackInsn(&result, kNopInsn); } - result.push_back(static_cast<uint8_t>(bl >> 16)); - result.push_back(static_cast<uint8_t>(bl >> 24)); - result.push_back(static_cast<uint8_t>(bl)); - result.push_back(static_cast<uint8_t>(bl >> 8)); + PushBackInsn(&result, bl); return result; } void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + uint32_t GetOutputInsn32(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 4u); + return (static_cast<uint32_t>(output_[offset]) << 16) | + (static_cast<uint32_t>(output_[offset + 1]) << 24) | + (static_cast<uint32_t>(output_[offset + 2]) << 0) | + (static_cast<uint32_t>(output_[offset + 3]) << 8); + } + + uint16_t GetOutputInsn16(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 2u); + return (static_cast<uint32_t>(output_[offset]) << 0) | + (static_cast<uint32_t>(output_[offset + 1]) << 8); + } + + void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -164,7 +293,7 @@ const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) { dex_cache_arrays_begin_ = dex_cache_arrays_begin; - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset), LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset), }; @@ -175,7 +304,7 @@ void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_ void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { constexpr uint32_t kStringIndex = 1u; string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), }; @@ -214,7 +343,7 @@ void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker } TEST_F(Thumb2RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -227,11 +356,11 @@ TEST_F(Thumb2RelativePatcherTest, CallSelf) { } TEST_F(Thumb2RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { + const LinkerPatch method2_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); @@ -254,7 +383,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOther) { } TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -274,7 +403,7 @@ TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), }; @@ -303,7 +432,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -325,7 +454,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -347,7 +476,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -382,7 +511,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -445,5 +574,710 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } +void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 4 * KB); + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ false); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 32u); + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ true); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetWide##offset##_##ref_reg) { \ + TestBakerFieldWide(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) +TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) + +#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetNarrow##offset##_##ref_reg) { \ + TestBakerFieldNarrow(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { + // One thunk in the middle with maximum distance branches to it from both sides. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kLiteralOffset2 = 4; + static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch + // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); + size_t filler2_size = + 1 * MB - (kLiteralOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { + // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction + // earlier, so the thunk is emitted before the filler. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + Link(); + + const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { + // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded + // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kReachableFromOffset2 = 4; + constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; + static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // If not for the extra NOP, this would allow reaching the thunk from the BNE + // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take + // PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); + size_t filler2_size = + 1 * MB - (kReachableFromOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_last = + BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = + RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + ASSERT_GE(output_.size() - thunk_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + ASSERT_GT(offset, -256); + const uint32_t load_lock_word = + kLdrNegativeOffset | + (-offset & 0xffu) | + (base_reg << 16) | + (/* IP */ 12 << 12); + EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency. + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 4u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check uses the correct register, i.e. root_reg. + if (root_reg < 8) { + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + } else { + ASSERT_GE(output_.size() - thunk_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + } + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + // Not appplicable to high registers. + }; + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 2u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { + // Test 1MiB of patches to the same thunk to stress-test different large offsets. + // (The low bits are not that important but the location of the high bits is easy to get wrong.) + std::vector<uint8_t> code; + code.reserve(1 * MB); + const size_t num_patches = 1 * MB / 8u; + std::vector<LinkerPatch> patches; + patches.reserve(num_patches); + const uint32_t ldr = + kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&code, ldr); + PushBackInsn(&code, kBneWPlus0); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + ASSERT_EQ(1 * MB, code.size()); + ASSERT_EQ(num_patches, patches.size()); + AddCompiledMethod(MethodRef(1u), + ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + + // The thunk is right after the method code. + DCHECK_ALIGNED(1 * MB, kArmAlignment); + std::vector<uint8_t> expected_code; + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&expected_code, ldr); + PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { + // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` + // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily + // hold when we're reserving thunks of different sizes. This test exposes the situation + // by using Baker thunks and a method call thunk. + + // Add a method call patch that can reach to method 1 offset + 16MiB. + uint32_t method_idx = 0u; + constexpr size_t kMethodCallLiteralOffset = 2u; + constexpr uint32_t kMissingMethodIdx = 2u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); + const LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), + }; + ArrayRef<const uint8_t> code1(raw_code1); + ++method_idx; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); + + // Skip kMissingMethodIdx. + ++method_idx; + ASSERT_EQ(kMissingMethodIdx, method_idx); + // Add a method with the right size that the method code for the next one starts 1MiB + // after code for method 1. + size_t filler_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB + // before the currently scheduled MaxNextOffset() for the method call thunk. + for (uint32_t i = 0; i != 14; ++i) { + filler_size = 1 * MB - sizeof(OatQuickMethodHeader); + filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + } + + // Add 2 Baker GC root patches to the last method, one that would allow the thunk at + // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the + // second that needs it kArmAlignment after that. Given the size of the GC root thunk + // is more than the space required by the method call thunk plus kArmAlignment, + // this pushes the first GC root thunk's pending MaxNextOffset() before the method call + // thunk's pending MaxNextOffset() which needs to be adjusted. + ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, + CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); + static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); + constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; + constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; + // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. + const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); + const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); + const std::vector<uint8_t> last_method_raw_code = RawCode({ + kNopInsn, // Padding before first GC root read barrier. + ldr1, kBneWPlus0, // First GC root LDR with read barrier. + ldr2, kBneWPlus0, // Second GC root LDR with read barrier. + }); + uint32_t encoded_data1 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); + uint32_t encoded_data2 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); + const LinkerPatch last_method_patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), + ArrayRef<const uint8_t>(last_method_raw_code), + ArrayRef<const LinkerPatch>(last_method_patches)); + + // The main purpose of the test is to check that Link() does not cause a crash. + Link(); + + ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 5c6fb504cf..2b06e3f649 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -305,37 +305,42 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod DCHECK_LT(literal_offset, code->size()); uint32_t insn = GetInsn(code, literal_offset); DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) - ThunkKey key = GetBakerReadBarrierKey(patch); + ThunkKey key = GetBakerThunkKey(patch); if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); // Check that the next instruction matches the expected LDR. - switch (key.GetType()) { - case ThunkType::kBakerReadBarrierField: { + switch (kind) { + case BakerReadBarrierKind::kField: { DCHECK_GE(code->size() - literal_offset, 8u); uint32_t next_insn = GetInsn(code, literal_offset + 4u); // LDR (immediate) with correct base_reg. CheckValidReg(next_insn & 0x1fu); // Check destination register. - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5)); + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); break; } - case ThunkType::kBakerReadBarrierArray: { + case BakerReadBarrierKind::kArray: { DCHECK_GE(code->size() - literal_offset, 8u); uint32_t next_insn = GetInsn(code, literal_offset + 4u); // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. CheckValidReg(next_insn & 0x1fu); // Check destination register. - CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5)); + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); CheckValidReg((next_insn >> 16) & 0x1f); // Check index register break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kGcRoot: { DCHECK_GE(literal_offset, 4u); uint32_t prev_insn = GetInsn(code, literal_offset - 4u); // LDR (immediate) with correct root_reg. - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg); + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); break; } default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); UNREACHABLE(); } } @@ -347,49 +352,6 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod SetInsn(code, literal_offset, insn); } -ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch) { - DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); - uint32_t value = patch.GetBakerCustomValue1(); - BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value); - ThunkParams params; - switch (type) { - case BakerReadBarrierKind::kField: - params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.field_params.base_reg); - params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); - CheckValidReg(params.field_params.holder_reg); - break; - case BakerReadBarrierKind::kArray: - params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.array_params.base_reg); - params.array_params.dummy = 0u; - DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); - break; - case BakerReadBarrierKind::kGcRoot: - params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.root_params.root_reg); - params.root_params.dummy = 0u; - DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); - break; - default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type); - UNREACHABLE(); - } - constexpr uint8_t kTypeTranslationOffset = 1u; - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), - "Thunk type translation check."); - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray), - "Thunk type translation check."); - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), - "Thunk type translation check."); - return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset), - params); -} - #define __ assembler.GetVIXLAssembler()-> static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, @@ -419,28 +381,22 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // Note: The fake dependency is unnecessary for the slow path. } -std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { +void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, + uint32_t encoded_data) { using namespace vixl::aarch64; // NOLINT(build/namespaces) - ArenaPool pool; - ArenaAllocator arena(&pool); - arm64::Arm64Assembler assembler(&arena); - - switch (key.GetType()) { - case ThunkType::kMethodCall: { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - break; - } - case ThunkType::kBakerReadBarrierField: { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { // Check if the holder is gray and, if not, add fake dependency to the base register // and return to the LDR instruction to load the reference. Otherwise, use introspection // to load the reference and call the entrypoint (in IP1) that performs further checks // on the reference and marks it if needed. - auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg); - auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg); + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -469,8 +425,11 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { } break; } - case ThunkType::kBakerReadBarrierArray: { - auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg); + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); vixl::aarch64::Label slow_path; @@ -489,12 +448,15 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { __ Br(ip1); // Jump to the entrypoint's array switch case. break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kGcRoot: { // Check if the reference needs to be marked and if so (i.e. not null, not marked yet // and it does not have a forwarding address), call the correct introspection entrypoint; // otherwise return the reference (or the extracted forwarding address). // There is no gray bit check for GC roots. - auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg); + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); vixl::aarch64::Label return_label, not_marked, forwarding_address; @@ -517,6 +479,30 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { __ Br(lr); break; } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { + ArenaPool pool; + ArenaAllocator arena(&pool); + arm64::Arm64Assembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + break; + } + case ThunkType::kBakerReadBarrier: { + CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1); + break; + } } // Ensure we emit the literal pool. @@ -529,24 +515,20 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { #undef __ -uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierArray: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondPositiveDisplacement; } } -uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierArray: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondNegativeDisplacement; } } diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index 71ab70eda9..02a5b1ef8f 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -23,17 +23,15 @@ #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm64 { +class Arm64Assembler; +} // namespace arm64 + namespace linker { class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { public: - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kArray, // Array get with index in register. - kGcRoot, // GC root load. - kLast - }; - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { CheckValidReg(base_reg); CheckValidReg(holder_reg); @@ -77,14 +75,20 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast + }; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 5u; @@ -96,9 +100,11 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u); + DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; } + void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index 57ea886586..b6549eefb3 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -167,9 +167,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -473,25 +471,22 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } uint32_t GetOutputInsn(uint32_t offset) { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 476906a768..ed630cda91 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1550,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); - InsertDeoptInLoop(loop, block, cond); + InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true); ReplaceInstruction(check, array); return true; } @@ -1616,11 +1616,16 @@ class BCEVisitor : public HGraphVisitor { } /** Inserts a deoptimization test in a loop preheader. */ - void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { + void InsertDeoptInLoop(HLoopInformation* loop, + HBasicBlock* block, + HInstruction* condition, + bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); block->InsertInstructionBefore(condition, block->GetLastInstruction()); + DeoptimizationKind kind = + is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc()); + GetGraph()->GetArena(), condition, kind, suspend->GetDexPc()); block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); if (suspend->HasEnvironment()) { deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( @@ -1633,7 +1638,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); block->InsertInstructionBefore(condition, bounds_check); HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc()); + GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc()); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); } @@ -1749,6 +1754,7 @@ class BCEVisitor : public HGraphVisitor { phi = NewPhi(new_preheader, instruction, type); } user->ReplaceInput(phi, index); // Removes the use node from the list. + induction_range_.Replace(user, instruction, phi); // update induction } } // Scan all environment uses of an instruction and replace each later use with a phi node. diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index 048073e37a..c806dbfef6 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -203,7 +203,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc()); + GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc()); pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( suspend->GetEnvironment(), loop_info->GetHeader()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7cc577580..cf2a391e8f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -25,6 +26,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -60,10 +62,45 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const Register kBakerCcEntrypointRegister = R4; + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() +static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) { + DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK_EQ(kBakerCcEntrypointRegister, + instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>()); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler())); + __ BindTrackedLabel(bne_label); + Label placeholder_label; + __ b(&placeholder_label, NE); // Placeholder, patched at link-time. + __ Bind(&placeholder_label); +} + +static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) { + return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u; +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -585,8 +622,13 @@ class DeoptimizationSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadImmediate(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -819,7 +861,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -954,6 +996,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); @@ -1962,6 +2016,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2672,7 +2727,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3067,6 +3125,15 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5272,7 +5339,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5738,11 +5816,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5854,8 +5956,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6692,6 +6806,13 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6871,6 +6992,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7041,6 +7165,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -7914,48 +8041,96 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow); + Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -7973,6 +8148,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct } } +void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } +} + void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -7982,6 +8167,76 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register ref_reg = ref.AsRegister<Register>(); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); + Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = temp.AsRegister<Register>(); + DCHECK_NE(base, kBakerCcEntrypointRegister); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); + offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); + } + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + EmitPlaceholderBne(this, bne_label); + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ LoadFromOffset(kLoadWord, ref_reg, base, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8002,9 +8257,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = index.AsRegister<Register>(); + Register ref_reg = ref.AsRegister<Register>(); + Register data_reg = temp.AsRegister<Register>(); + DCHECK_NE(data_reg, kBakerCcEntrypointRegister); + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + __ AddConstant(data_reg, obj, data_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + EmitPlaceholderBne(this, bne_label); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); + __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8016,9 +8329,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8029,6 +8340,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + Register temp_reg = temp.AsRegister<Register>(); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8041,52 +8419,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } Register temp_reg = temp.AsRegister<Register>(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + + // field_offset` when the GC is marking. The entrypoint will already + // be loaded in `temp3`. Location temp3 = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only - // supports address of the form `obj + field_offset`, where `obj` - // is a register and `field_offset` is a register pair (of which - // only the lower half is used). Thus `offset` and `scale_factor` - // above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8098,8 +8454,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8370,6 +8726,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( return &patches->back(); } +Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index) { return boot_image_string_patches_.GetOrCreate( @@ -8436,7 +8797,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8470,6 +8832,10 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 86f2f21df7..b94ee20d9d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -488,6 +488,11 @@ class CodeGeneratorARM : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + Label* NewBakerReadBarrierPatch(uint32_t custom_data); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); @@ -503,6 +508,10 @@ class CodeGeneratorARM : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -526,11 +535,6 @@ class CodeGeneratorARM : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -538,9 +542,27 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -616,6 +638,13 @@ class CodeGeneratorARM : public CodeGenerator { Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + Label label; + uint32_t custom_data; + }; + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, @@ -648,6 +677,8 @@ class CodeGeneratorARM : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 0bc4bd7524..d59f8b435c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -587,8 +587,13 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -851,7 +856,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -1002,6 +1007,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); @@ -3693,7 +3710,10 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5479,6 +5499,15 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -6073,7 +6102,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6168,7 +6197,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -6230,8 +6259,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field) { + bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -6268,41 +6296,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // entrypoint will already be loaded in `temp2`. Register temp2 = lr; Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path; - if (always_update_field) { - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register. Thus - // `offset` and `scale_factor` above are expected to be null in - // this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); AddSlowPath(slow_path); // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -6314,12 +6319,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp2, slow_path->GetEntryLabel()); - // Fast path: just load the reference. + // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); } +void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // } + + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp2`. + Register temp2 = lr; + Location temp2_loc = LocationFrom(temp2); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ 0u /* "times 1" */, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp2, MemOperand(tr, entry_point_offset)); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Cbnz(temp2, slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). + __ Bind(slow_path->GetExitLabel()); +} + void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, Location ref, Register obj, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 3ded3e4412..f16f625b6c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -635,9 +635,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -646,8 +643,27 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field = false); + bool use_load_acquire); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + Location field_offset, + vixl::aarch64::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b6678b03ef..9f03a39bd5 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -16,6 +16,7 @@ #include "code_generator_arm_vixl.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -24,6 +25,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics_arm_vixl.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const vixl32::Register kBakerCcEntrypointRegister = r4; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -88,6 +104,60 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " +static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, + HInstruction* instruction) { + DCHECK(temps->IsAvailable(ip)); + temps->Exclude(ip); + DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); + DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { + ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + +static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { + return rt.IsLow() && rn.IsLow() && offset < 32u; +} + +class EmitAdrCode { + public: + EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) + : assembler_(assembler), rd_(rd), label_(label) { + ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + adr_location_ = assembler->GetCursorOffset(); + assembler->adr(EncodingSize(Wide), rd, label); + } + + ~EmitAdrCode() { + DCHECK(label_->IsBound()); + // The ADR emitted by the assembler does not set the Thumb mode bit we need. + // TODO: Maybe extend VIXL to allow ADR for return address? + uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_); + // Expecting ADR encoding T3 with `(offset & 1) == 0`. + DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26. + DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23. + DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15. + DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`. + // Add the Thumb mode bit. + raw_adr[2] |= 0x01u; + } + + private: + ArmVIXLMacroAssembler* const assembler_; + vixl32::Register rd_; + vixl32::Label* const label_; + int32_t adr_location_; +}; + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -608,8 +678,14 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); + arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } @@ -845,7 +921,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -987,6 +1063,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); // /* int32_t */ monitor = obj->monitor_ @@ -2012,6 +2100,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2704,7 +2793,10 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3103,6 +3195,15 @@ void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5280,7 +5381,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5747,11 +5859,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5862,8 +5998,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { vixl32::Register out = OutputRegister(instruction); if (index.IsConstant()) { @@ -6753,6 +6901,13 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6929,6 +7084,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7091,6 +7249,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7989,48 +8150,98 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( + root_reg.GetCode(), narrow); + vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard(GetVIXLAssembler(), + 4 * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -8048,6 +8259,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } +} + void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, @@ -8057,6 +8278,85 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + DCHECK(!base.Is(kBakerCcEntrypointRegister)); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base.GetCode(), obj.GetCode(), narrow); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8077,9 +8377,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer. + DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + __ Add(data_reg, obj, Operand(data_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8091,9 +8455,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - vixl32::Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8104,6 +8466,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + vixl32::Register temp_reg = RegisterFrom(temp); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl32::Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8113,55 +8542,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } vixl32::Register temp_reg = RegisterFrom(temp); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp3`. Location temp3 = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register pair (of - // which only the lower half is used). Thus `offset` and - // `scale_factor` above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8173,8 +8579,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8488,6 +8894,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } +vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral( const DexFile& dex_file, dex::StringIndex string_index) { @@ -8512,10 +8923,6 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); } -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(address, &uint32_literals_); -} - VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( const DexFile& dex_file, dex::StringIndex string_index, @@ -8569,7 +8976,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8603,6 +9011,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 1e9669dc38..657d3c134f 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -572,12 +572,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -589,6 +593,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -612,11 +620,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch32::Register obj, @@ -624,9 +627,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - vixl::aarch32::Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl::aarch32::Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -713,6 +734,13 @@ class CodeGeneratorARMVIXL : public CodeGenerator { VIXLUInt32Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + vixl::aarch32::Label label; + uint32_t custom_data; + }; + VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); @@ -750,6 +778,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index aa030b279c..e9870acff4 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -219,15 +219,33 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); + const bool is_load_class_bss_entry = + (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; + // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + dex::TypeIndex type_index = cls_->GetTypeIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType; mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); @@ -237,25 +255,27 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + // The class entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } - RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the class entry. Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - DCHECK_NE(out.AsRegister<Register>(), AT); CodeGeneratorMIPS::PcRelativePatchInfo* info = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); bool reordering = __ SetReorder(false); @@ -286,40 +306,62 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + DCHECK(instruction_->IsLoadString()); + DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const dex::StringIndex string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); + // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + // Store the resolved string to the BSS entry. + if (r2_baker_or_no_read_barriers) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + Primitive::Type type = instruction_->GetType(); mips_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); - RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - DCHECK_NE(out, AT); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - + // Store the resolved string to the BSS entry. + if (!r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the string entry. + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + bool reordering = __ SetReorder(false); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); + } __ B(GetExitLabel()); } @@ -451,8 +493,13 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -1719,15 +1766,14 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, DCHECK_EQ(code[literal_offset + 1], 0x12); DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); DCHECK_EQ(code[literal_offset + 3], 0x3C); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low DCHECK_EQ(code[literal_offset + 4], 0x78); DCHECK_EQ(code[literal_offset + 5], 0x56); - DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C); - addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low". + addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); } @@ -2436,6 +2482,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3438,8 +3487,6 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - Register dst = locations->Out().AsRegister<Register>(); - MipsLabel true_label; switch (type) { default: @@ -3448,27 +3495,14 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { return; case Primitive::kPrimLong: - // TODO: don't use branches. - GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label); - break; + GenerateLongCompare(instruction->GetCondition(), locations); + return; case Primitive::kPrimFloat: case Primitive::kPrimDouble: GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); return; } - - // Convert the branches into the result. - MipsLabel done; - - // False case: result = 0. - __ LoadConst32(dst, 0); - __ B(&done); - - // True case: result = 1. - __ Bind(&true_label); - __ LoadConst32(dst, 1); - __ Bind(&done); } void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -4238,6 +4272,221 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, } } +void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond, + LocationSummary* locations) { + Register dst = locations->Out().AsRegister<Register>(); + Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_high = ZERO; + Register rhs_low = ZERO; + int64_t imm = 0; + uint32_t imm_high = 0; + uint32_t imm_low = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); + imm_high = High32Bits(imm); + imm_low = Low32Bits(imm); + } else { + rhs_high = rhs_location.AsRegisterPairHigh<Register>(); + rhs_low = rhs_location.AsRegisterPairLow<Register>(); + } + if (use_imm && imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Or(dst, lhs_high, lhs_low); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Or(dst, lhs_high, lhs_low); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + __ Slt(dst, lhs_high, ZERO); + break; + case kCondGE: + __ Slt(dst, lhs_high, ZERO); + __ Xori(dst, dst, 1); + break; + case kCondLE: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + __ Xori(dst, dst, 1); + break; + case kCondGT: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + break; + case kCondB: // always false + __ Andi(dst, dst, 0); + break; + case kCondAE: // always true + __ Ori(dst, ZERO, 1); + break; + } + } else if (use_imm) { + // TODO: more efficient comparison with constants without loading them into TMP/AT. + switch (cond) { + case kCondEQ: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, lhs_high, TMP); + __ Slt(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, TMP, lhs_high); + __ Slt(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, lhs_high, TMP); + __ Sltu(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, TMP, lhs_high); + __ Sltu(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } else { + switch (cond) { + case kCondEQ: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + __ Slt(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + __ Slt(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + __ Sltu(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + __ Sltu(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } +} + void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label) { @@ -5155,7 +5404,10 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5767,6 +6019,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall)); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { InvokeRuntimeCallingConvention calling_convention; @@ -6445,6 +6700,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6452,6 +6708,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6461,6 +6718,9 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -7048,26 +7308,27 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); - + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; @@ -7078,6 +7339,22 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { break; } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -7160,10 +7437,22 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option); + } generate_null_check = true; break; } @@ -7227,13 +7516,14 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; @@ -7246,9 +7536,25 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { } if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } } @@ -7305,14 +7611,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); + } SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); __ Beqz(out, slow_path->GetEntryLabel()); @@ -7342,6 +7660,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -7766,6 +8085,15 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 03939e3530..5ad1f12f8a 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -295,6 +295,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); + void GenerateLongCompare(IfCondition cond, LocationSummary* locations); void GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 19250c64e3..f04e3841f5 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -141,7 +141,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} + explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) + : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -192,7 +193,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips64_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } RestoreLiveRegisters(codegen, locations); @@ -200,10 +203,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - DCHECK_NE(out.AsRegister<GpuRegister>(), AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -250,16 +249,13 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); mips64_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); RestoreLiveRegisters(codegen, locations); // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - DCHECK_NE(out, AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -397,8 +393,13 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -1986,6 +1987,9 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3906,7 +3910,10 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3982,6 +3989,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -4544,6 +4554,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -4551,6 +4562,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -4560,6 +4572,9 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -5077,10 +5092,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -5090,10 +5103,24 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } if (load_kind == HLoadClass::LoadKind::kReferrersClass) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -5224,9 +5251,20 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5294,6 +5332,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -5653,6 +5692,15 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 08a752f1d2..cf2d5cbee3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -384,8 +384,14 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + x86_codegen->Load32BitValue( + calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -1688,7 +1694,10 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -2057,6 +2066,15 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff6e099d12..f2ed52b5a5 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -397,8 +397,14 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + x86_64_codegen->Load32BitValue( + CpuRegister(calling_convention.GetRegisterAt(0)), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -1710,7 +1716,10 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -2165,6 +2174,15 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 12340b416d..aea901dec7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -338,14 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // Ensure the inputs of `instruction` are defined in a block of the graph. for (HInstruction* input : instruction->GetInputs()) { - const HInstructionList& list = input->IsPhi() - ? input->GetBlock()->GetPhis() - : input->GetBlock()->GetInstructions(); - if (!list.Contains(input)) { - AddError(StringPrintf("Input %d of instruction %d is not defined " - "in a basic block of the control-flow graph.", + if (input->GetBlock() == nullptr) { + AddError(StringPrintf("Input %d of instruction %d is not in any " + "basic block of the control-flow graph.", input->GetId(), instruction->GetId())); + } else { + const HInstructionList& list = input->IsPhi() + ? input->GetBlock()->GetPhis() + : input->GetBlock()->GetInstructions(); + if (!list.Contains(input)) { + AddError(StringPrintf("Input %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + input->GetId(), + instruction->GetId())); + } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 92d0f3c032..8674e727bb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -783,7 +783,7 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( - graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc); + graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); @@ -817,7 +817,17 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, } const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - bool is_referrer = (klass.Get() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + bool is_referrer; + ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod(); + if (outermost_art_method == nullptr) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // We are in AOT mode and we don't have an ART method to determine + // if the inlined method belongs to the referrer. Assume it doesn't. + is_referrer = false; + } else { + is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass(); + } + // Note that we will just compare the classes, so we don't need Java semantics access checks. // Note that the type index and the dex file are relative to the method this type guard is // inlined into. @@ -850,7 +860,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, graph_->GetArena(), compare, receiver, - HDeoptimize::Kind::kInline, + Runtime::Current()->IsAotCompiler() + ? DeoptimizationKind::kAotInlineCache + : DeoptimizationKind::kJitInlineCache, invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -1137,7 +1149,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( graph_->GetArena(), compare, receiver, - HDeoptimize::Kind::kInline, + DeoptimizationKind::kJitSameTarget, invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -1470,8 +1482,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, } } if (needs_constructor_barrier) { - HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc); - invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction); + // See CompilerDriver::RequiresConstructorBarrier for more details. + DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; + + HConstructorFence* constructor_fence = + new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena()); + invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence, + invoke_instruction); } *return_replacement = nullptr; break; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 978c6a2d71..8b79da8c73 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -451,10 +451,13 @@ void HInstructionBuilder::InitializeParameters() { referrer_method_id.class_idx_, parameter_index++, Primitive::kPrimNot, - true); + /* is_this */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); number_of_parameters--; + current_this_parameter_ = parameter; + } else { + DCHECK(current_this_parameter_ == nullptr); } const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); @@ -465,7 +468,7 @@ void HInstructionBuilder::InitializeParameters() { arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, Primitive::GetType(shorty[shorty_pos]), - false); + /* is_this */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use @@ -588,6 +591,8 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } +// Does the method being compiled need any constructor barriers being inserted? +// (Always 'false' for methods that aren't <init>.) static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { // Can be null in unit tests only. if (UNLIKELY(cu == nullptr)) { @@ -596,6 +601,11 @@ static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDri Thread* self = Thread::Current(); return cu->IsConstructor() + && !cu->IsStatic() + // RequiresConstructorBarrier must only be queried for <init> methods; + // it's effectively "false" for every other method. + // + // See CompilerDriver::RequiresConstructBarrier for more explanation. && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); } @@ -639,13 +649,24 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc) { if (type == Primitive::kPrimVoid) { + // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { - AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc)); + // Compiling instance constructor. + if (kIsDebugBuild) { + std::string method_name = graph_->GetMethodName(); + CHECK_EQ(std::string("<init>"), method_name); + } + + HInstruction* fence_target = current_this_parameter_; + DCHECK(fence_target != nullptr); + + AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_)); } AppendInstruction(new (arena_) HReturnVoid(dex_pc)); } else { + DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); AppendInstruction(new (arena_) HReturn(value, dex_pc)); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 7fdc1883ca..2fb5c7b94d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -62,6 +62,7 @@ class HInstructionBuilder : public ValueObject { current_block_(nullptr), current_locals_(nullptr), latest_result_(nullptr), + current_this_parameter_(nullptr), compiler_driver_(driver), code_generator_(code_generator), dex_compilation_unit_(dex_compilation_unit), @@ -325,6 +326,11 @@ class HInstructionBuilder : public ValueObject { HBasicBlock* current_block_; ArenaVector<HInstruction*>* current_locals_; HInstruction* latest_result_; + // Current "this" parameter. + // Valid only after InitializeParameters() finishes. + // * Null for static methods. + // * Non-null for instance methods. + HParameterValue* current_this_parameter_; CompilerDriver* const compiler_driver_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2dcc12e294..2cedde900e 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -257,7 +257,8 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { if (shift_amount->IsConstant()) { int64_t cst = Int64FromConstant(shift_amount->AsConstant()); - if ((cst & implicit_mask) == 0) { + int64_t masked_cst = cst & implicit_mask; + if (masked_cst == 0) { // Replace code looking like // SHL dst, value, 0 // with @@ -266,6 +267,17 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); return; + } else if (masked_cst != cst) { + // Replace code looking like + // SHL dst, value, cst + // where cst exceeds maximum distance with the equivalent + // SHL dst, value, cst & implicit_mask + // (as defined by shift semantics). This ensures other + // optimizations do not need to special case for such situations. + DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt); + instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); + RecordSimplification(); + return; } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 750f9cc213..69cf9a126f 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -1648,6 +1645,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4d360158a2..65a82229e9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co Register temp = WRegisterFrom(locations->GetTemp(0)); // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, + /* field_offset */ offset_loc, temp, /* needs_null_check */ false, - /* use_load_acquire */ false, - /* always_update_field */ true); + /* use_load_acquire */ false); } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fd8a37ae05..356d5bcb0c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -2026,6 +2023,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bfe04f5ae0..abf5b122c8 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1525,6 +1525,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index c5e116046f..9dce59b2af 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1168,6 +1168,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 48699b33ae..8d8cc93b9b 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -566,14 +566,22 @@ class LSEVisitor : public HGraphVisitor { store->GetBlock()->RemoveInstruction(store); } - // Eliminate allocations that are not used. + // Eliminate singleton-classified instructions: + // * - Constructor fences (they never escape this thread). + // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { + HConstructorFence::RemoveConstructorFences(new_instance); + if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); } } for (HInstruction* new_array : singleton_new_arrays_) { + // TODO: Delete constructor fences for new-array + // In the future HNewArray instructions will have HConstructorFence's for them. + // HConstructorFence::RemoveConstructorFences(new_array); + if (!new_array->HasNonEnvironmentUses()) { new_array->RemoveEnvironmentUsers(); new_array->GetBlock()->RemoveInstruction(new_array); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index bbc55dd16f..881802d714 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -71,7 +71,7 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that char normally zero extends does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<int8_t>::min() <= value && @@ -119,7 +119,7 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that byte/short normally sign extend does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<uint8_t>::min() <= value && @@ -833,19 +833,14 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, // TODO: accept symbolic, albeit loop invariant shift factors. HInstruction* opa = instruction->InputAt(0); HInstruction* opb = instruction->InputAt(1); - int64_t value = 0; - if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) { - // Make sure shift distance only looks at lower bits, as defined for sequential shifts. - int64_t mask = (instruction->GetType() == Primitive::kPrimLong) - ? kMaxLongShiftDistance - : kMaxIntShiftDistance; - int64_t distance = value & mask; + int64_t distance = 0; + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + IsInt64AndGet(opb, /*out*/ &distance)) { // Restrict shift distance to packed data type width. int64_t max_distance = Primitive::ComponentSize(type) * 8; if (0 <= distance && distance < max_distance) { if (generate_code) { - HInstruction* s = graph_->GetIntConstant(distance); - GenerateVecOp(instruction, vector_map_->Get(opa), s, type); + GenerateVecOp(instruction, vector_map_->Get(opa), opb, type); } return true; } @@ -1177,14 +1172,14 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, int64_t value = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { + IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) { // // TODO: make following code less sensitive to associativity and commutativity differences. // HInstruction* x = instruction->InputAt(0); // Test for an optional rounding part (x + 1) >> 1. bool is_rounded = false; - if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { + if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) { x = x->InputAt(0); is_rounded = true; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ca953a1a7e..a8bfe610de 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -528,6 +528,15 @@ HCurrentMethod* HGraph::GetCurrentMethod() { return cached_current_method_; } +const char* HGraph::GetMethodName() const { + const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_); + return dex_file_.GetMethodName(method_id); +} + +std::string HGraph::PrettyMethod(bool with_signature) const { + return dex_file_.PrettyMethod(method_idx_, with_signature); +} + HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) { switch (type) { case Primitive::Type::kPrimBoolean: @@ -1150,6 +1159,81 @@ void HVariableInputSizeInstruction::RemoveInputAt(size_t index) { } } +void HVariableInputSizeInstruction::RemoveAllInputs() { + RemoveAsUserOfAllInputs(); + DCHECK(!HasNonEnvironmentUses()); + + inputs_.clear(); + DCHECK_EQ(0u, InputCount()); +} + +void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { + DCHECK(instruction->GetBlock() != nullptr); + // Removing constructor fences only makes sense for instructions with an object return type. + DCHECK_EQ(Primitive::kPrimNot, instruction->GetType()); + + // Efficient implementation that simultaneously (in one pass): + // * Scans the uses list for all constructor fences. + // * Deletes that constructor fence from the uses list of `instruction`. + // * Deletes `instruction` from the constructor fence's inputs. + // * Deletes the constructor fence if it now has 0 inputs. + + const HUseList<HInstruction*>& uses = instruction->GetUses(); + // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt. + for (auto it = uses.begin(), end = uses.end(); it != end; ) { + const HUseListNode<HInstruction*>& use_node = *it; + HInstruction* const use_instruction = use_node.GetUser(); + + // Advance the iterator immediately once we fetch the use_node. + // Warning: If the input is removed, the current iterator becomes invalid. + ++it; + + if (use_instruction->IsConstructorFence()) { + HConstructorFence* ctor_fence = use_instruction->AsConstructorFence(); + size_t input_index = use_node.GetIndex(); + + // Process the candidate instruction for removal + // from the graph. + + // Constructor fence instructions are never + // used by other instructions. + // + // If we wanted to make this more generic, it + // could be a runtime if statement. + DCHECK(!ctor_fence->HasUses()); + + // A constructor fence's return type is "kPrimVoid" + // and therefore it can't have any environment uses. + DCHECK(!ctor_fence->HasEnvironmentUses()); + + // Remove the inputs first, otherwise removing the instruction + // will try to remove its uses while we are already removing uses + // and this operation will fail. + DCHECK_EQ(instruction, ctor_fence->InputAt(input_index)); + + // Removing the input will also remove the `use_node`. + // (Do not look at `use_node` after this, it will be a dangling reference). + ctor_fence->RemoveInputAt(input_index); + + // Once all inputs are removed, the fence is considered dead and + // is removed. + if (ctor_fence->InputCount() == 0u) { + ctor_fence->GetBlock()->RemoveInstruction(ctor_fence); + } + } + } + + if (kIsDebugBuild) { + // Post-condition checks: + // * None of the uses of `instruction` are a constructor fence. + // * The `instruction` itself did not get removed from a block. + for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) { + CHECK(!use_node.GetUser()->IsConstructorFence()); + } + CHECK(instruction->GetBlock() != nullptr); + } +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -1338,18 +1422,6 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) { } } -std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) { - switch (rhs) { - case HDeoptimize::Kind::kBCE: - return os << "bce"; - case HDeoptimize::Kind::kInline: - return os << "inline"; - default: - LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs); - UNREACHABLE(); - } -} - bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { return this == instruction->GetPreviousDisregardingMoves(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 36c7df70ce..b4da20b558 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -30,6 +30,7 @@ #include "base/transform_array_ref.h" #include "dex_file.h" #include "dex_file_types.h" +#include "deoptimization_kind.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" @@ -46,6 +47,7 @@ namespace art { class GraphChecker; class HBasicBlock; +class HConstructorFence; class HCurrentMethod; class HDoubleConstant; class HEnvironment; @@ -57,6 +59,7 @@ class HIntConstant; class HInvoke; class HLongConstant; class HNullConstant; +class HParameterValue; class HPhi; class HSuspendCheck; class HTryBoundary; @@ -537,6 +540,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return method_idx_; } + // Get the method name (without the signature), e.g. "<init>" + const char* GetMethodName() const; + + // Get the pretty method name (class + name + optionally signature). + std::string PrettyMethod(bool with_signature = true) const; + InvokeType GetInvokeType() const { return invoke_type_; } @@ -1297,6 +1306,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ + M(ConstructorFence, Instruction) \ M(CurrentMethod, Instruction) \ M(ShouldDeoptimizeFlag, Instruction) \ M(Deoptimize, Instruction) \ @@ -1476,8 +1486,11 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) template <typename T> class HUseListNode : public ArenaObject<kArenaAllocUseListNode> { public: + // Get the instruction which has this use as one of the inputs. T GetUser() const { return user_; } + // Get the position of the input record that this use corresponds to. size_t GetIndex() const { return index_; } + // Set the position of the input record that this use corresponds to. void SetIndex(size_t index) { index_ = index; } // Hook for the IntrusiveForwardList<>. @@ -2037,7 +2050,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !IsNativeDebugInfo() && !IsParameterValue() && // If we added an explicit barrier then we should keep it. - !IsMemoryBarrier(); + !IsMemoryBarrier() && + !IsConstructorFence(); } bool IsDeadAndRemovable() const { @@ -2431,6 +2445,11 @@ class HVariableInputSizeInstruction : public HInstruction { void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); + // Removes all the inputs. + // Also removes this instructions from each input's use list + // (for non-environment uses only). + void RemoveAllInputs(); + protected: HVariableInputSizeInstruction(SideEffects side_effects, uint32_t dex_pc, @@ -2973,15 +2992,9 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize FINAL : public HVariableInputSizeInstruction { public: - enum class Kind { - kBCE, - kInline, - kLast = kInline - }; - // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move // across. - HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc) + HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( SideEffects::All(), dex_pc, @@ -3001,7 +3014,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { HDeoptimize(ArenaAllocator* arena, HInstruction* cond, HInstruction* guard, - Kind kind, + DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( SideEffects::CanTriggerGC(), @@ -3025,7 +3038,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { bool CanThrow() const OVERRIDE { return true; } - Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); } + DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); } Primitive::Type GetType() const OVERRIDE { return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid; @@ -3050,18 +3063,17 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits; static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1; static constexpr size_t kFieldDeoptimizeKindSize = - MinimumBitsToStore(static_cast<size_t>(Kind::kLast)); + MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast)); static constexpr size_t kNumberOfDeoptimizePackedBits = kFieldDeoptimizeKind + kFieldDeoptimizeKindSize; static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; + using DeoptimizeKindField = + BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; -std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs); - // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. // The compiled code checks this flag value in a guard before devirtualized call and // if it's true, starts to do deoptimization. @@ -5069,7 +5081,7 @@ class HParameterValue FINAL : public HExpression<0> { const DexFile& GetDexFile() const { return dex_file_; } dex::TypeIndex GetTypeIndex() const { return type_index_; } uint8_t GetIndex() const { return index_; } - bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); } + bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); } bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } @@ -6507,6 +6519,137 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier); }; +// A constructor fence orders all prior stores to fields that could be accessed via a final field of +// the specified object(s), with respect to any subsequent store that might "publish" +// (i.e. make visible) the specified object to another thread. +// +// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens +// for all final fields (that were set) at the end of the invoked constructor. +// +// The constructor fence models the freeze actions for the final fields of an object +// being constructed (semantically at the end of the constructor). Constructor fences +// have a per-object affinity; two separate objects being constructed get two separate +// constructor fences. +// +// (Note: that if calling a super-constructor or forwarding to another constructor, +// the freezes would happen at the end of *that* constructor being invoked). +// +// The memory model guarantees that when the object being constructed is "published" after +// constructor completion (i.e. escapes the current thread via a store), then any final field +// writes must be observable on other threads (once they observe that publication). +// +// Further, anything written before the freeze, and read by dereferencing through the final field, +// must also be visible (so final object field could itself have an object with non-final fields; +// yet the freeze must also extend to them). +// +// Constructor example: +// +// class HasFinal { +// final int field; Optimizing IR for <init>()V: +// HasFinal() { +// field = 123; HInstanceFieldSet(this, HasFinal.field, 123) +// // freeze(this.field); HConstructorFence(this) +// } HReturn +// } +// +// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of +// already-initialized classes; in that case the allocation must act as a "default-initializer" +// of the object which effectively writes the class pointer "final field". +// +// For example, we can model default-initialiation as roughly the equivalent of the following: +// +// class Object { +// private final Class header; +// } +// +// Java code: Optimizing IR: +// +// T new_instance<T>() { +// Object obj = allocate_memory(T.class.size); obj = HInvoke(art_quick_alloc_object, T) +// obj.header = T.class; // header write is done by above call. +// // freeze(obj.header) HConstructorFence(obj) +// return (T)obj; +// } +// +// See also: +// * CompilerDriver::RequiresConstructorBarrier +// * QuasiAtomic::ThreadFenceForConstructor +// +class HConstructorFence FINAL : public HVariableInputSizeInstruction { + // A fence has variable inputs because the inputs can be removed + // after prepare_for_register_allocation phase. + // (TODO: In the future a fence could freeze multiple objects + // after merging two fences together.) + public: + // `fence_object` is the reference that needs to be protected for correct publication. + // + // It makes sense in the following situations: + // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true). + // * new-instance-like instructions, it's the return value (i.e. HNewInstance). + // + // After construction the `fence_object` becomes the 0th input. + // This is not an input in a real sense, but just a convenient place to stash the information + // about the associated object. + HConstructorFence(HInstruction* fence_object, + uint32_t dex_pc, + ArenaAllocator* arena) + // We strongly suspect there is not a more accurate way to describe the fine-grained reordering + // constraints described in the class header. We claim that these SideEffects constraints + // enforce a superset of the real constraints. + // + // The ordering described above is conservatively modeled with SideEffects as follows: + // + // * To prevent reordering of the publication stores: + // ----> "Reads of objects" is the initial SideEffect. + // * For every primitive final field store in the constructor: + // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect. + // * If there are any stores to reference final fields in the constructor: + // ----> Use a more conservative "AllReads" SideEffect because any stores to any references + // that are reachable from `fence_object` also need to be prevented for reordering + // (and we do not want to do alias analysis to figure out what those stores are). + // + // In the implementation, this initially starts out as an "all reads" side effect; this is an + // even more conservative approach than the one described above, and prevents all of the + // above reordering without analyzing any of the instructions in the constructor. + // + // If in a later phase we discover that there are no writes to reference final fields, + // we can refine the side effect to a smaller set of type reads (see above constraints). + : HVariableInputSizeInstruction(SideEffects::AllReads(), + dex_pc, + arena, + /* number_of_inputs */ 1, + kArenaAllocConstructorFenceInputs) { + DCHECK(fence_object != nullptr); + SetRawInputAt(0, fence_object); + } + + // The object associated with this constructor fence. + // + // (Note: This will be null after the prepare_for_register_allocation phase, + // as all constructor fence inputs are removed there). + HInstruction* GetFenceObject() const { + return InputAt(0); + } + + // Find all the HConstructorFence uses (`fence_use`) for `this` and: + // - Delete `fence_use` from `this`'s use list. + // - Delete `this` from `fence_use`'s inputs list. + // - If the `fence_use` is dead, remove it from the graph. + // + // A fence is considered dead once it no longer has any uses + // and all of the inputs are dead. + // + // This must *not* be called during/after prepare_for_register_allocation, + // because that removes all the inputs to the fences but the fence is actually + // still considered live. + static void RemoveConstructorFences(HInstruction* instruction); + + DECLARE_INSTRUCTION(ConstructorFence); + + private: + DISALLOW_COPY_AND_ASSIGN(HConstructorFence); +}; + class HMonitorOperation FINAL : public HTemplateInstruction<1> { public: enum class OperationKind { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 66bfea9860..c3c141bff7 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -167,6 +167,13 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { } } +void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) { + // Delete all the inputs to the constructor fence; + // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a + // LocationSummary in the LocationsBuilder. + constructor_fence->RemoveAllInputs(); +} + void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStaticWithExplicitClinitCheck()) { HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass(); diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 7ffbe44ef6..395d4ba2ee 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -43,6 +43,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitArraySet(HArraySet* instruction) OVERRIDE; void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index a1016d1d47..029eb4ba61 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -190,7 +190,7 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); block->AddInstruction(ae); HInstruction* deoptimize = - new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u); + new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, /* number_of_vregs */ 5, diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 0ed8a35338..0f24e81be2 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -652,6 +652,9 @@ class ArmAssembler : public Assembler { virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; + // ADR instruction loading register for branching to the label. + virtual void AdrCode(Register rt, Label* label) = 0; + // Memory barriers. virtual void dmb(DmbOptions flavor) = 0; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 1e71d06b49..d7096b3c87 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -214,14 +214,14 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { DCHECK_GE(dest_end, src_end); for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { Fixup* fixup = &*i; + size_t old_fixup_location = fixup->GetLocation(); if (fixup->GetOriginalSize() == fixup->GetSize()) { // The size of this Fixup didn't change. To avoid moving the data // in small chunks, emit the code to its original position. - fixup->Emit(&buffer_, adjusted_code_size); fixup->Finalize(dest_end - src_end); + fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size); } else { // Move the data between the end of the fixup and src_end to its final location. - size_t old_fixup_location = fixup->GetLocation(); size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); size_t data_size = src_end - src_begin; size_t dest_begin = dest_end - data_size; @@ -230,7 +230,7 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { dest_end = dest_begin - fixup->GetSizeInBytes(); // Finalize the Fixup and emit the data to the new location. fixup->Finalize(dest_end - src_end); - fixup->Emit(&buffer_, adjusted_code_size); + fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size); } } CHECK_EQ(src_end, dest_end); @@ -1895,6 +1895,9 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kCbxz48Bit: return 6u; + case kCodeAddr4KiB: + return 4u; + case kLiteral1KiB: return 2u; case kLiteral4KiB: @@ -1973,6 +1976,15 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff -= 2; // Extra CMP Rn, #0, 16-bit. break; + case kCodeAddr4KiB: + // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. + DCHECK_ALIGNED(diff, 2); + diff += location_ & 2; + // Add the Thumb mode bit. + diff += 1; + break; + case kLiteral1KiB: case kLiteral4KiB: case kLongOrFPLiteral1KiB: @@ -1987,8 +1999,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; - case kLiteral1MiB: case kLiteral64KiB: + case kLiteral1MiB: case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. @@ -2041,6 +2053,10 @@ bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const { // We don't support conditional branches beyond +-1MiB. return true; + case kCodeAddr4KiB: + // ADR uses the aligned PC and as such the offset cannot be calculated early. + return false; + case kLiteral1KiB: case kLiteral4KiB: case kLiteral64KiB: @@ -2087,6 +2103,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) // We don't support conditional branches beyond +-1MiB. break; + case kCodeAddr4KiB: + // We don't support Code address ADR beyond +4KiB. + break; + case kLiteral1KiB: DCHECK(!IsHighRegister(rn_)); if (IsUint<10>(GetOffset(current_code_size))) { @@ -2159,13 +2179,15 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) return current_code_size - old_code_size; } -void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { +void Thumb2Assembler::Fixup::Emit(uint32_t emit_location, + AssemblerBuffer* buffer, + uint32_t code_size) const { switch (GetSize()) { case kBranch16Bit: { DCHECK(type_ == kUnconditional || type_ == kConditional); DCHECK_EQ(type_ == kConditional, cond_ != AL); int16_t encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kBranch32Bit: { @@ -2180,15 +2202,15 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK_NE(encoding & B12, 0); encoding ^= B14 | B12; } - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kCbxz16Bit: { DCHECK(type_ == kCompareAndBranchXZero); int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kCbxz32Bit: { @@ -2196,8 +2218,8 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2, b_encoding); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2, b_encoding); break; } case kCbxz48Bit: { @@ -2205,24 +2227,32 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); - buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16); + buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kCodeAddr4KiB: { + DCHECK(type_ == kLoadCodeAddr); + int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral1KiB: { DCHECK(type_ == kLoadLiteralNarrow); int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteral4KiB: { DCHECK(type_ == kLoadLiteralNarrow); // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral64KiB: { @@ -2242,11 +2272,11 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralFar: { @@ -2256,36 +2286,36 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralAddr1KiB: { DCHECK(type_ == kLoadLiteralAddr); int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteralAddr4KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteralAddr64KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); break; } case kLiteralAddrFar: { @@ -2294,29 +2324,29 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); break; } case kLongOrFPLiteral1KiB: { int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLongOrFPLiteral64KiB: { int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLongOrFPLiteralFar: { @@ -2325,13 +2355,13 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } } @@ -3331,6 +3361,19 @@ void Thumb2Assembler::bx(Register rm, Condition cond) { } +void Thumb2Assembler::AdrCode(Register rt, Label* label) { + uint32_t pc = buffer_.Size(); + FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt)); + CHECK(!label->IsBound()); + // ADR target must be an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); + Emit16(0); + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); +} + + void Thumb2Assembler::Push(Register rd, Condition cond) { str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond); } @@ -3405,7 +3448,7 @@ void Thumb2Assembler::Bind(Label* label) { break; } } - last_fixup.Emit(&buffer_, buffer_.Size()); + last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size()); fixups_.pop_back(); } } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 1c495aa7a7..2ff9018510 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -268,6 +268,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; + // ADR instruction loading register for branching to the label, including the Thumb mode bit. + void AdrCode(Register rt, Label* label) OVERRIDE; + virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, @@ -377,6 +380,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { force_32bit_ = true; } + void Allow16Bit() { + force_32bit_ = false; + } + // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This // will generate a fixup. JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; @@ -422,6 +429,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { kUnconditionalLink, // BL. kUnconditionalLinkX, // BLX. kCompareAndBranchXZero, // cbz/cbnz. + kLoadCodeAddr, // Get address of a code label, used for Baker read barriers. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. kLoadLiteralAddr, // Load address of literal (used for jump table). @@ -442,6 +450,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + // ADR variants. + kCodeAddr4KiB, // ADR rX, <label>; label must be after the ADR but within 4KiB range. + // Multi-instruction expansion is not supported. + // Load integer literal variants. // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. kLiteral1KiB, @@ -492,6 +504,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { cond, kCompareAndBranchXZero, kCbxz16Bit, location); } + // Code address. + static Fixup LoadCodeAddress(uint32_t location, Register rt) { + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadCodeAddr, kCodeAddr4KiB, location); + } + // Load narrow literal. static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || @@ -550,6 +568,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { switch (GetOriginalSize()) { case kBranch32Bit: case kCbxz48Bit: + case kCodeAddr4KiB: case kLiteralFar: case kLiteralAddrFar: case kLongOrFPLiteralFar: @@ -623,7 +642,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Emit the branch instruction into the assembler buffer. This does the // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const; private: Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, @@ -903,6 +922,26 @@ class Thumb2Assembler FINAL : public ArmAssembler { FixupId last_fixup_id_; }; +class ScopedForce32Bit { + public: + explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true) + : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { + if (force) { + assembler->Force32Bit(); + } + } + + ~ScopedForce32Bit() { + if (!old_force_32bit_) { + assembler_->Allow16Bit(); + } + } + + private: + Thumb2Assembler* const assembler_; + const bool old_force_32bit_; +}; + } // namespace arm } // namespace art diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 9fd42d2cb7..660409f6f9 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1374,6 +1374,26 @@ class Dex2Oat FINAL { oat_filenames_.push_back(oat_location_.c_str()); } + // If we're updating in place a vdex file, be defensive and put an invalid vdex magic in case + // dex2oat gets killed. + // Note: we're only invalidating the magic data in the file, as dex2oat needs the rest of + // the information to remain valid. + if (update_input_vdex_) { + std::unique_ptr<BufferedOutputStream> vdex_out(MakeUnique<BufferedOutputStream>( + MakeUnique<FileOutputStream>(vdex_files_.back().get()))); + if (!vdex_out->WriteFully(&VdexFile::Header::kVdexInvalidMagic, + arraysize(VdexFile::Header::kVdexInvalidMagic))) { + PLOG(ERROR) << "Failed to invalidate vdex header. File: " << vdex_out->GetLocation(); + return false; + } + + if (!vdex_out->Flush()) { + PLOG(ERROR) << "Failed to flush stream after invalidating header of vdex file." + << " File: " << vdex_out->GetLocation(); + return false; + } + } + // Swap file handling // // If the swap fd is not -1, we assume this is the file descriptor of an open but unlinked file @@ -2433,8 +2453,8 @@ class Dex2Oat FINAL { // which uses an unstarted runtime. raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr)); - // Also force the free-list implementation for large objects. - raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr)); + // The default LOS implementation (map) is not deterministic. So disable it. + raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=disabled", nullptr)); // We also need to turn off the nonmoving space. For that, we need to disable HSpace // compaction (done above) and ensure that neither foreground nor background collectors diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index d546072f58..6420aa8759 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -430,6 +430,9 @@ class Dex2oatSwapUseTest : public Dex2oatSwapTest { }; TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) { + // Native memory usage isn't correctly tracked under sanitization. + TEST_DISABLED_FOR_MEMORY_TOOL_ASAN(); + // The `native_alloc_2_ >= native_alloc_1_` assertion below may not // hold true on some x86 systems; disable this test while we // investigate (b/29259363). diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp index a2116cdc93..588a3ae3ca 100644 --- a/dexlayout/Android.bp +++ b/dexlayout/Android.bp @@ -20,7 +20,7 @@ art_cc_defaults { "dexlayout.cc", "dex_ir.cc", "dex_ir_builder.cc", - "dex_verify.cc", + "dex_verify.cc", "dex_visualize.cc", "dex_writer.cc", ], @@ -43,6 +43,7 @@ art_cc_library { art_cc_binary { name: "dexlayout", + defaults: ["art_defaults"], host_supported: true, srcs: ["dexlayout_main.cc"], cflags: ["-Wall"], @@ -61,13 +62,28 @@ art_cc_test { art_cc_binary { name: "dexdiag", - host_supported: false, + defaults: ["art_defaults"], + host_supported: true, srcs: ["dexdiag.cc"], cflags: ["-Wall"], shared_libs: [ "libart", "libart-dexlayout", - "libpagemap", ], + target: { + android: { + shared_libs: [ + "libpagemap", + ] + }, + } } +art_cc_test { + name: "art_dexdiag_tests", + host_supported: true, + defaults: [ + "art_gtest_defaults", + ], + srcs: ["dexdiag_test.cc"], +} diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc index f1c6f67a7c..cf453b9a16 100644 --- a/dexlayout/dex_ir.cc +++ b/dexlayout/dex_ir.cc @@ -281,6 +281,16 @@ void Collections::ReadEncodedValue( item->SetDouble(conv.d); break; } + case DexFile::kDexAnnotationMethodType: { + const uint32_t proto_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetProtoId(GetProtoId(proto_index)); + break; + } + case DexFile::kDexAnnotationMethodHandle: { + const uint32_t method_handle_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetMethodHandle(GetMethodHandle(method_handle_index)); + break; + } case DexFile::kDexAnnotationString: { const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); item->SetStringId(GetStringId(string_index)); @@ -766,6 +776,64 @@ ClassData* Collections::CreateClassData( return class_data; } +void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) { + // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems. + const DexFile::MapList* map = + reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset()); + for (uint32_t i = 0; i < map->size_; ++i) { + const DexFile::MapItem* item = map->list_ + i; + switch (item->type_) { + case DexFile::kDexTypeCallSiteIdItem: + SetCallSiteIdsOffset(item->offset_); + break; + case DexFile::kDexTypeMethodHandleItem: + SetMethodHandleItemsOffset(item->offset_); + break; + default: + break; + } + } + // Populate MethodHandleItems first (CallSiteIds may depend on them). + for (uint32_t i = 0; i < dex_file.NumMethodHandles(); i++) { + CreateMethodHandleItem(dex_file, i); + } + // Populate CallSiteIds. + for (uint32_t i = 0; i < dex_file.NumCallSiteIds(); i++) { + CreateCallSiteId(dex_file, i); + } +} + +void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) { + const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i); + const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_; + EncodedArrayItem* call_site_item = + CreateEncodedArrayItem(disk_call_item_ptr, disk_call_site_id.data_off_); + + CallSiteId* call_site_id = new CallSiteId(call_site_item); + call_site_ids_.AddIndexedItem(call_site_id, CallSiteIdsOffset() + i * CallSiteId::ItemSize(), i); +} + +void Collections::CreateMethodHandleItem(const DexFile& dex_file, uint32_t i) { + const DexFile::MethodHandleItem& disk_method_handle = dex_file.GetMethodHandle(i); + uint16_t index = disk_method_handle.field_or_method_idx_; + DexFile::MethodHandleType type = + static_cast<DexFile::MethodHandleType>(disk_method_handle.method_handle_type_); + bool is_invoke = type == DexFile::MethodHandleType::kInvokeStatic || + type == DexFile::MethodHandleType::kInvokeInstance || + type == DexFile::MethodHandleType::kInvokeConstructor; + static_assert(DexFile::MethodHandleType::kLast == DexFile::MethodHandleType::kInvokeConstructor, + "Unexpected method handle types."); + IndexedItem* field_or_method_id; + if (is_invoke) { + field_or_method_id = GetMethodId(index); + } else { + field_or_method_id = GetFieldId(index); + } + MethodHandleItem* method_handle = new MethodHandleItem(type, field_or_method_id); + method_handle_items_.AddIndexedItem( + method_handle, MethodHandleItemsOffset() + i * MethodHandleItem::ItemSize(), i); +} + static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) { return 0; } @@ -823,6 +891,16 @@ static const FileSectionDescriptor kFileSectionDescriptors[] = { &dex_ir::Collections::ClassDefsSize, &dex_ir::Collections::ClassDefsOffset }, { + "CallSiteId", + DexFile::kDexTypeCallSiteIdItem, + &dex_ir::Collections::CallSiteIdsSize, + &dex_ir::Collections::CallSiteIdsOffset + }, { + "MethodHandle", + DexFile::kDexTypeMethodHandleItem, + &dex_ir::Collections::MethodHandleItemsSize, + &dex_ir::Collections::MethodHandleItemsOffset + }, { "StringData", DexFile::kDexTypeStringDataItem, &dex_ir::Collections::StringDatasSize, diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h index cad039550a..5692eb2b39 100644 --- a/dexlayout/dex_ir.h +++ b/dexlayout/dex_ir.h @@ -35,6 +35,7 @@ class AnnotationItem; class AnnotationsDirectoryItem; class AnnotationSetItem; class AnnotationSetRefList; +class CallSiteId; class ClassData; class ClassDef; class CodeItem; @@ -47,6 +48,7 @@ class FieldItem; class Header; class MapList; class MapItem; +class MethodHandleItem; class MethodId; class MethodItem; class ParameterAnnotation; @@ -65,6 +67,8 @@ static constexpr size_t kProtoIdItemSize = 12; static constexpr size_t kFieldIdItemSize = 8; static constexpr size_t kMethodIdItemSize = 8; static constexpr size_t kClassDefItemSize = 32; +static constexpr size_t kCallSiteIdItemSize = 4; +static constexpr size_t kMethodHandleItemSize = 8; // Visitor support class AbstractDispatcher { @@ -79,6 +83,8 @@ class AbstractDispatcher { virtual void Dispatch(const ProtoId* proto_id) = 0; virtual void Dispatch(const FieldId* field_id) = 0; virtual void Dispatch(const MethodId* method_id) = 0; + virtual void Dispatch(const CallSiteId* call_site_id) = 0; + virtual void Dispatch(const MethodHandleItem* method_handle_item) = 0; virtual void Dispatch(ClassData* class_data) = 0; virtual void Dispatch(ClassDef* class_def) = 0; virtual void Dispatch(FieldItem* field_item) = 0; @@ -165,6 +171,9 @@ class Collections { std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); } std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); } std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); } + std::vector<std::unique_ptr<CallSiteId>>& CallSiteIds() { return call_site_ids_.Collection(); } + std::vector<std::unique_ptr<MethodHandleItem>>& MethodHandleItems() + { return method_handle_items_.Collection(); } std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas() { return string_datas_.Collection(); } std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); } @@ -189,6 +198,10 @@ class Collections { void CreateFieldId(const DexFile& dex_file, uint32_t i); void CreateMethodId(const DexFile& dex_file, uint32_t i); void CreateClassDef(const DexFile& dex_file, uint32_t i); + void CreateCallSiteId(const DexFile& dex_file, uint32_t i); + void CreateMethodHandleItem(const DexFile& dex_file, uint32_t i); + + void CreateCallSitesAndMethodHandles(const DexFile& dex_file); TypeList* CreateTypeList(const DexFile::TypeList* type_list, uint32_t offset); EncodedArrayItem* CreateEncodedArrayItem(const uint8_t* static_data, uint32_t offset); @@ -207,6 +220,8 @@ class Collections { FieldId* GetFieldId(uint32_t index) { return FieldIds()[index].get(); } MethodId* GetMethodId(uint32_t index) { return MethodIds()[index].get(); } ClassDef* GetClassDef(uint32_t index) { return ClassDefs()[index].get(); } + CallSiteId* GetCallSiteId(uint32_t index) { return CallSiteIds()[index].get(); } + MethodHandleItem* GetMethodHandle(uint32_t index) { return MethodHandleItems()[index].get(); } StringId* GetStringIdOrNullPtr(uint32_t index) { return index == DexFile::kDexNoIndex ? nullptr : GetStringId(index); @@ -221,6 +236,8 @@ class Collections { uint32_t FieldIdsOffset() const { return field_ids_.GetOffset(); } uint32_t MethodIdsOffset() const { return method_ids_.GetOffset(); } uint32_t ClassDefsOffset() const { return class_defs_.GetOffset(); } + uint32_t CallSiteIdsOffset() const { return call_site_ids_.GetOffset(); } + uint32_t MethodHandleItemsOffset() const { return method_handle_items_.GetOffset(); } uint32_t StringDatasOffset() const { return string_datas_.GetOffset(); } uint32_t TypeListsOffset() const { return type_lists_.GetOffset(); } uint32_t EncodedArrayItemsOffset() const { return encoded_array_items_.GetOffset(); } @@ -240,6 +257,9 @@ class Collections { void SetFieldIdsOffset(uint32_t new_offset) { field_ids_.SetOffset(new_offset); } void SetMethodIdsOffset(uint32_t new_offset) { method_ids_.SetOffset(new_offset); } void SetClassDefsOffset(uint32_t new_offset) { class_defs_.SetOffset(new_offset); } + void SetCallSiteIdsOffset(uint32_t new_offset) { call_site_ids_.SetOffset(new_offset); } + void SetMethodHandleItemsOffset(uint32_t new_offset) + { method_handle_items_.SetOffset(new_offset); } void SetStringDatasOffset(uint32_t new_offset) { string_datas_.SetOffset(new_offset); } void SetTypeListsOffset(uint32_t new_offset) { type_lists_.SetOffset(new_offset); } void SetEncodedArrayItemsOffset(uint32_t new_offset) @@ -262,6 +282,8 @@ class Collections { uint32_t FieldIdsSize() const { return field_ids_.Size(); } uint32_t MethodIdsSize() const { return method_ids_.Size(); } uint32_t ClassDefsSize() const { return class_defs_.Size(); } + uint32_t CallSiteIdsSize() const { return call_site_ids_.Size(); } + uint32_t MethodHandleItemsSize() const { return method_handle_items_.Size(); } uint32_t StringDatasSize() const { return string_datas_.Size(); } uint32_t TypeListsSize() const { return type_lists_.Size(); } uint32_t EncodedArrayItemsSize() const { return encoded_array_items_.Size(); } @@ -288,6 +310,8 @@ class Collections { CollectionVector<FieldId> field_ids_; CollectionVector<MethodId> method_ids_; CollectionVector<ClassDef> class_defs_; + CollectionVector<CallSiteId> call_site_ids_; + CollectionVector<MethodHandleItem> method_handle_items_; CollectionMap<StringData> string_datas_; CollectionMap<TypeList> type_lists_; @@ -603,8 +627,10 @@ class EncodedValue { void SetDouble(double d) { u_.double_val_ = d; } void SetStringId(StringId* string_id) { u_.string_val_ = string_id; } void SetTypeId(TypeId* type_id) { u_.type_val_ = type_id; } + void SetProtoId(ProtoId* proto_id) { u_.proto_val_ = proto_id; } void SetFieldId(FieldId* field_id) { u_.field_val_ = field_id; } void SetMethodId(MethodId* method_id) { u_.method_val_ = method_id; } + void SetMethodHandle(MethodHandleItem* method_handle) { u_.method_handle_val_ = method_handle; } void SetEncodedArray(EncodedArrayItem* encoded_array) { encoded_array_.reset(encoded_array); } void SetEncodedAnnotation(EncodedAnnotation* encoded_annotation) { encoded_annotation_.reset(encoded_annotation); } @@ -619,8 +645,10 @@ class EncodedValue { double GetDouble() const { return u_.double_val_; } StringId* GetStringId() const { return u_.string_val_; } TypeId* GetTypeId() const { return u_.type_val_; } + ProtoId* GetProtoId() const { return u_.proto_val_; } FieldId* GetFieldId() const { return u_.field_val_; } MethodId* GetMethodId() const { return u_.method_val_; } + MethodHandleItem* GetMethodHandle() const { return u_.method_handle_val_; } EncodedArrayItem* GetEncodedArray() const { return encoded_array_.get(); } EncodedAnnotation* GetEncodedAnnotation() const { return encoded_annotation_.get(); } @@ -639,8 +667,10 @@ class EncodedValue { double double_val_; StringId* string_val_; TypeId* type_val_; + ProtoId* proto_val_; FieldId* field_val_; MethodId* method_val_; + MethodHandleItem* method_handle_val_; } u_; std::unique_ptr<EncodedArrayItem> encoded_array_; std::unique_ptr<EncodedAnnotation> encoded_annotation_; @@ -1087,6 +1117,48 @@ class AnnotationsDirectoryItem : public Item { DISALLOW_COPY_AND_ASSIGN(AnnotationsDirectoryItem); }; +class CallSiteId : public IndexedItem { + public: + explicit CallSiteId(EncodedArrayItem* call_site_item) : call_site_item_(call_site_item) { + size_ = kCallSiteIdItemSize; + } + ~CallSiteId() OVERRIDE { } + + static size_t ItemSize() { return kCallSiteIdItemSize; } + + EncodedArrayItem* CallSiteItem() const { return call_site_item_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + EncodedArrayItem* call_site_item_; + + DISALLOW_COPY_AND_ASSIGN(CallSiteId); +}; + +class MethodHandleItem : public IndexedItem { + public: + MethodHandleItem(DexFile::MethodHandleType method_handle_type, IndexedItem* field_or_method_id) + : method_handle_type_(method_handle_type), + field_or_method_id_(field_or_method_id) { + size_ = kMethodHandleItemSize; + } + ~MethodHandleItem() OVERRIDE { } + + static size_t ItemSize() { return kMethodHandleItemSize; } + + DexFile::MethodHandleType GetMethodHandleType() const { return method_handle_type_; } + IndexedItem* GetFieldOrMethodId() const { return field_or_method_id_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + DexFile::MethodHandleType method_handle_type_; + IndexedItem* field_or_method_id_; + + DISALLOW_COPY_AND_ASSIGN(MethodHandleItem); +}; + // TODO(sehr): implement MapList. class MapList : public Item { public: diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc index d0c5bf964e..8eb726a64a 100644 --- a/dexlayout/dex_ir_builder.cc +++ b/dexlayout/dex_ir_builder.cc @@ -72,6 +72,8 @@ Header* DexIrBuilder(const DexFile& dex_file) { } // MapItem. collections.SetMapListOffset(disk_header.map_off_); + // CallSiteIds and MethodHandleItems. + collections.CreateCallSitesAndMethodHandles(dex_file); CheckAndSetRemainingOffsets(dex_file, &collections); @@ -115,6 +117,14 @@ static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* co CHECK_EQ(item->size_, collections->ClassDefsSize()); CHECK_EQ(item->offset_, collections->ClassDefsOffset()); break; + case DexFile::kDexTypeCallSiteIdItem: + CHECK_EQ(item->size_, collections->CallSiteIdsSize()); + CHECK_EQ(item->offset_, collections->CallSiteIdsOffset()); + break; + case DexFile::kDexTypeMethodHandleItem: + CHECK_EQ(item->size_, collections->MethodHandleItemsSize()); + CHECK_EQ(item->offset_, collections->MethodHandleItemsOffset()); + break; case DexFile::kDexTypeMapList: CHECK_EQ(item->size_, 1u); CHECK_EQ(item->offset_, disk_header.map_off_); diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc index 7ffa38bfd4..e1b828ca52 100644 --- a/dexlayout/dex_writer.cc +++ b/dexlayout/dex_writer.cc @@ -151,6 +151,12 @@ size_t DexWriter::WriteEncodedValue(dex_ir::EncodedValue* encoded_value, size_t length = EncodeDoubleValue(encoded_value->GetDouble(), buffer); start = 8 - length; break; + case DexFile::kDexAnnotationMethodType: + length = EncodeUIntValue(encoded_value->GetProtoId()->GetIndex(), buffer); + break; + case DexFile::kDexAnnotationMethodHandle: + length = EncodeUIntValue(encoded_value->GetMethodHandle()->GetIndex(), buffer); + break; case DexFile::kDexAnnotationString: length = EncodeUIntValue(encoded_value->GetStringId()->GetIndex(), buffer); break; @@ -485,6 +491,27 @@ void DexWriter::WriteClasses() { } } +void DexWriter::WriteCallSites() { + uint32_t call_site_off[1]; + for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id : + header_->GetCollections().CallSiteIds()) { + call_site_off[0] = call_site_id->CallSiteItem()->GetOffset(); + Write(call_site_off, call_site_id->GetSize(), call_site_id->GetOffset()); + } +} + +void DexWriter::WriteMethodHandles() { + uint16_t method_handle_buff[4]; + for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle : + header_->GetCollections().MethodHandleItems()) { + method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType()); + method_handle_buff[1] = 0; // unused. + method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex(); + method_handle_buff[3] = 0; // unused. + Write(method_handle_buff, method_handle->GetSize(), method_handle->GetOffset()); + } +} + struct MapItemContainer { MapItemContainer(uint32_t type, uint32_t size, uint32_t offset) : type_(type), size_(size), offset_(offset) { } @@ -528,6 +555,14 @@ void DexWriter::WriteMapItem() { queue.push(MapItemContainer(DexFile::kDexTypeClassDefItem, collection.ClassDefsSize(), collection.ClassDefsOffset())); } + if (collection.CallSiteIdsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeCallSiteIdItem, collection.CallSiteIdsSize(), + collection.CallSiteIdsOffset())); + } + if (collection.MethodHandleItemsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeMethodHandleItem, + collection.MethodHandleItemsSize(), collection.MethodHandleItemsOffset())); + } // Data section. queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset())); @@ -618,10 +653,8 @@ void DexWriter::WriteHeader() { uint32_t class_defs_off = collections.ClassDefsOffset(); buffer[16] = class_defs_size; buffer[17] = class_defs_off; - uint32_t data_off = class_defs_off + class_defs_size * dex_ir::ClassDef::ItemSize(); - uint32_t data_size = file_size - data_off; - buffer[18] = data_size; - buffer[19] = data_off; + buffer[18] = header_->DataSize(); + buffer[19] = header_->DataOffset(); Write(buffer, 20 * sizeof(uint32_t), offset); } @@ -640,6 +673,8 @@ void DexWriter::WriteMemMap() { WriteDebugInfoItems(); WriteCodeItems(); WriteClasses(); + WriteCallSites(); + WriteMethodHandles(); WriteMapItem(); WriteHeader(); } diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h index fb76e5ccfc..b396adf126 100644 --- a/dexlayout/dex_writer.h +++ b/dexlayout/dex_writer.h @@ -59,6 +59,8 @@ class DexWriter { void WriteDebugInfoItems(); void WriteCodeItems(); void WriteClasses(); + void WriteCallSites(); + void WriteMethodHandles(); void WriteMapItem(); void WriteHeader(); diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc index ea2679a1e3..c577b6e105 100644 --- a/dexlayout/dexdiag.cc +++ b/dexlayout/dexdiag.cc @@ -15,6 +15,7 @@ */ #include <errno.h> +#include <inttypes.h> #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -30,7 +31,9 @@ #include "dex_file.h" #include "dex_ir.h" #include "dex_ir_builder.h" +#ifdef ART_TARGET_ANDROID #include "pagemap/pagemap.h" +#endif #include "runtime.h" #include "vdex_file.h" @@ -38,8 +41,6 @@ namespace art { using android::base::StringPrintf; -static constexpr size_t kLineLength = 32; - static bool g_verbose = false; // The width needed to print a file page offset (32-bit). @@ -164,6 +165,7 @@ static void PrintLetterKey() { std::cout << ". (Mapped page not resident)" << std::endl; } +#ifdef ART_TARGET_ANDROID static char PageTypeChar(uint16_t type) { if (kDexSectionInfoMap.find(type) == kDexSectionInfoMap.end()) { return '-'; @@ -194,6 +196,7 @@ static void ProcessPageMap(uint64_t* pagemap, size_t end, const std::vector<dex_ir::DexFileSection>& sections, PageCount* page_counts) { + static constexpr size_t kLineLength = 32; for (size_t page = start; page < end; ++page) { char type_char = '.'; if (PM_PAGEMAP_PRESENT(pagemap[page])) { @@ -268,7 +271,7 @@ static void ProcessOneDexMapping(uint64_t* pagemap, std::cerr << "Dex file start offset for " << dex_file->GetLocation().c_str() << " is incorrect: map start " - << StringPrintf("%zx > dex start %zx\n", map_start, dex_file_start) + << StringPrintf("%" PRIx64 " > dex start %" PRIx64 "\n", map_start, dex_file_start) << std::endl; return; } @@ -277,7 +280,7 @@ static void ProcessOneDexMapping(uint64_t* pagemap, uint64_t end_page = RoundUp(start_address + dex_file_size, kPageSize) / kPageSize; std::cout << "DEX " << dex_file->GetLocation().c_str() - << StringPrintf(": %zx-%zx", + << StringPrintf(": %" PRIx64 "-%" PRIx64, map_start + start_page * kPageSize, map_start + end_page * kPageSize) << std::endl; @@ -293,21 +296,20 @@ static void ProcessOneDexMapping(uint64_t* pagemap, DisplayDexStatistics(start_page, end_page, section_resident_pages, sections, printer); } -static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { +static bool IsVdexFileMapping(const std::string& mapped_name) { // Confirm that the map is from a vdex file. static const char* suffixes[] = { ".vdex" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { + std::string vdex_name = pm_map_name(map); // Extract all the dex files from the vdex file. std::string error_msg; std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name, @@ -331,6 +333,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { << ": error " << error_msg << std::endl; + return false; } // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; @@ -342,7 +345,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { // Process the dex files. std::cout << "MAPPING " << pm_map_name(map) - << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map)) + << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map)) << std::endl; for (const auto& dex_file : dex_files) { ProcessOneDexMapping(pagemap, @@ -356,6 +359,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { } static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printer) { + static constexpr size_t kLineLength = 32; size_t resident_page_count = 0; for (size_t page = 0; page < size; ++page) { char type_char = '.'; @@ -381,21 +385,19 @@ static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printe printer->PrintSkipLine(); } -static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { - // Confirm that the map is from a vdex file. +static bool IsOatFileMapping(const std::string& mapped_name) { + // Confirm that the map is from an oat file. static const char* suffixes[] = { ".odex", ".oat" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; size_t len; @@ -406,7 +408,7 @@ static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { // Process the dex files. std::cout << "MAPPING " << pm_map_name(map) - << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map)) + << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map)) << std::endl; ProcessOneOatMapping(pagemap, len, printer); free(pagemap); @@ -426,9 +428,10 @@ static bool FilterByNameContains(const std::string& mapped_file_name, } return false; } +#endif static void Usage(const char* cmd) { - std::cerr << "Usage: " << cmd << " [options] pid" << std::endl + std::cout << "Usage: " << cmd << " [options] pid" << std::endl << " --contains=<string>: Display sections containing string." << std::endl << " --help: Shows this message." << std::endl << " --verbose: Makes displays verbose." << std::endl; @@ -463,6 +466,7 @@ static int DexDiagMain(int argc, char* argv[]) { InitLogging(argv, Runtime::Aborter); MemMap::Init(); +#ifdef ART_TARGET_ANDROID pid_t pid; char* endptr; pid = (pid_t)strtol(argv[argc - 1], &endptr, 10); @@ -496,7 +500,8 @@ static int DexDiagMain(int argc, char* argv[]) { return EXIT_FAILURE; } - // Process the mappings that are due to DEX files. + bool match_found = false; + // Process the mappings that are due to vdex or oat files. Printer printer; for (size_t i = 0; i < num_maps; ++i) { std::string mapped_file_name = pm_map_name(maps[i]); @@ -504,12 +509,23 @@ static int DexDiagMain(int argc, char* argv[]) { if (!FilterByNameContains(mapped_file_name, name_filters)) { continue; } - if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { - return EXIT_FAILURE; - } else if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { - return EXIT_FAILURE; + if (IsVdexFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; + } else if (IsOatFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; } } + if (!match_found) { + std::cerr << "No relevant memory maps were found." << std::endl; + return EXIT_FAILURE; + } +#endif return EXIT_SUCCESS; } diff --git a/dexlayout/dexdiag_test.cc b/dexlayout/dexdiag_test.cc new file mode 100644 index 0000000000..a0b3f32756 --- /dev/null +++ b/dexlayout/dexdiag_test.cc @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <string> +#include <vector> + +#include "common_runtime_test.h" + +#include "runtime/exec_utils.h" +#include "runtime/oat_file.h" +#include "runtime/os.h" + +namespace art { + +static const char* kDexDiagContains = "--contains=core.vdex"; +static const char* kDexDiagContainsFails = "--contains=anything_other_than_core.vdex"; +static const char* kDexDiagHelp = "--help"; +static const char* kDexDiagVerbose = "--verbose"; +static const char* kDexDiagBinaryName = "dexdiag"; + +class DexDiagTest : public CommonRuntimeTest { + protected: + virtual void SetUp() { + CommonRuntimeTest::SetUp(); + } + + // Path to the dexdiag(d?)[32|64] binary. + std::string GetDexDiagFilePath() { + std::string root = GetTestAndroidRoot(); + + root += "/bin/"; + root += kDexDiagBinaryName; + + std::string root32 = root + "32"; + // If we have both a 32-bit and a 64-bit build, the 32-bit file will have a 32 suffix. + if (OS::FileExists(root32.c_str()) && !Is64BitInstructionSet(kRuntimeISA)) { + return root32; + } else { + // This is a 64-bit build or only a single build exists. + return root; + } + } + + std::unique_ptr<OatFile> OpenOatAndVdexFiles() { + // Open the core.oat file. + // This is a little convoluted because we have to + // get the location of the default core image (.../framework/core.oat), + // find it in the right architecture subdirectory (.../framework/arm/core.oat), + // Then, opening the oat file has the side-effect of opening the corresponding + // vdex file (.../framework/arm/core.vdex). + const std::string default_location = GetCoreOatLocation(); + EXPECT_TRUE(!default_location.empty()); + std::string oat_location = GetSystemImageFilename(default_location.c_str(), kRuntimeISA); + EXPECT_TRUE(!oat_location.empty()); + std::cout << "==" << oat_location << std::endl; + std::string error_msg; + std::unique_ptr<OatFile> oat(OatFile::Open(oat_location.c_str(), + oat_location.c_str(), + nullptr, + nullptr, + false, + /*low_4gb*/false, + nullptr, + &error_msg)); + EXPECT_TRUE(oat != nullptr) << error_msg; + return oat; + } + + // Run dexdiag with a custom boot image location. + bool Exec(pid_t this_pid, const std::vector<std::string>& args, std::string* error_msg) { + // Invoke 'dexdiag' against the current process. + // This should succeed because we have a runtime and so it should + // be able to map in the boot.art and do a diff for it. + std::vector<std::string> exec_argv; + + // Build the command line "dexdiag <args> this_pid". + std::string executable_path = GetDexDiagFilePath(); + EXPECT_TRUE(OS::FileExists(executable_path.c_str())) << executable_path + << " should be a valid file path"; + exec_argv.push_back(executable_path); + for (const auto& arg : args) { + exec_argv.push_back(arg); + } + exec_argv.push_back(std::to_string(this_pid)); + + return ::art::Exec(exec_argv, error_msg); + } +}; + +// We can't run these tests on the host, as they will fail when trying to open +// /proc/pid/pagemap. +// On the target, we invoke 'dexdiag' against the current process. +// This should succeed because we have a runtime and so dexdiag should +// be able to find the map for, e.g., boot.vdex and friends. +TEST_F(DexDiagTest, DexDiagHelpTest) { + // TODO: test the resulting output. + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagHelp }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagContainsTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagContainsTest) { +#endif + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + // TODO: test the resulting output. + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagContains }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagContainsFailsTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagContainsFailsTest) { +#endif + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + // TODO: test the resulting output. + std::string error_msg; + ASSERT_FALSE(Exec(getpid(), { kDexDiagContainsFails }, &error_msg)) + << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagVerboseTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagVerboseTest) { +#endif + // TODO: test the resulting output. + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagVerbose }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +} // namespace art diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index a857976021..d6056c0ece 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -71,6 +71,15 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET } // namespace arm namespace arm64 { @@ -83,6 +92,11 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET } // namespace arm64 namespace mips { diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index c03bcae526..8f2fd6ecc9 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -24,6 +24,36 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 192 +// The offset from the art_quick_read_barrier_mark_introspection (used for field +// loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR, +// i.e. art_quick_read_barrier_mark_introspection_narrow. +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20 +// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints, +// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0 +// The offset from art_quick_read_barrier_mark_introspection to the array switch cases, +// i.e. art_quick_read_barrier_mark_introspection_arrays. +#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 + +// The offset of the reference load LDR from the return address in LR for field loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -4 +#else +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -4 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -2 +#endif +// The offset of the reference load LDR from the return address in LR for array loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8 +#else +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4 +#endif +// The offset of the reference load LDR from the return address in LR for GC root loads. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6 + // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index de72d3a18f..919b0afc40 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -17,6 +17,7 @@ #include <math.h> #include <string.h> +#include "arch/arm/asm_support_arm.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" #include "entrypoints/quick/quick_default_externs.h" @@ -51,6 +52,13 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_narrow(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow( + mirror::Object*); + // Used by soft float. // Single-precision FP arithmetics. extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] @@ -67,19 +75,44 @@ extern "C" int __aeabi_idivmod(int32_t, int32_t); // [DIV|REM]_INT[_2ADDR|_LIT8 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR] extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + + // For the alignment check, strip the Thumb mode bit. + DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u); + // Check the field narrow entrypoint offset from the introspection entrypoint. + intptr_t narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_narrow) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET, narrow_diff); + // Check array switch cases offsets from the introspection entrypoint. + intptr_t array_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff); + // Check the GC root entrypoint offsets from the introspection entrypoint. + intptr_t gc_roots_wide_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_wide) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET, gc_roots_wide_diff); + intptr_t gc_roots_narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff); + // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12. + // We're using the entry to hold a pointer to the introspection entrypoint instead. + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -138,7 +171,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg12 = nullptr; // Cannot use register 12 (IP) to pass arguments. qpoints->pReadBarrierMarkReg13 = nullptr; // Cannot use register 13 (SP) to pass arguments. qpoints->pReadBarrierMarkReg14 = nullptr; // Cannot use register 14 (LR) to pass arguments. diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index a277edfa29..31a7f6ae8e 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1681,8 +1681,8 @@ END art_quick_instrumentation_entry .extern artDeoptimize ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 - mov r0, r9 @ Set up args. - blx artDeoptimize @ artDeoptimize(Thread*) + mov r0, r9 @ pass Thread::Current + blx artDeoptimize @ (Thread*) END art_quick_deoptimize /* @@ -1691,9 +1691,9 @@ END art_quick_deoptimize */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code - SETUP_SAVE_EVERYTHING_FRAME r0 - mov r0, r9 @ Set up args. - blx artDeoptimizeFromCompiledCode @ artDeoptimizeFromCompiledCode(Thread*) + SETUP_SAVE_EVERYTHING_FRAME r1 + mov r1, r9 @ pass Thread::Current + blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) END art_quick_deoptimize_from_compiled_code /* @@ -2146,6 +2146,289 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 +// Helper macros for Baker CC read barrier mark introspection (BRBMI). +.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register + \macro_for_register r0 + \macro_for_register r1 + \macro_for_register r2 + \macro_for_register r3 + \macro_for_reserved_register // R4 is reserved for the entrypoint address. + \macro_for_register r5 + \macro_for_register r6 + \macro_for_register r7 + \macro_for_register r8 + \macro_for_register r9 + \macro_for_register r10 + \macro_for_register r11 +.endm + +.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register + BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register + \macro_for_reserved_register // IP is reserved. + \macro_for_reserved_register // SP is reserved. + \macro_for_reserved_register // LR is reserved. + \macro_for_reserved_register // PC is reserved. +.endm + +.macro BRBMI_RETURN_SWITCH_CASE reg +.Lmark_introspection_return_switch_case_\reg: + mov \reg, ip + bx lr +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE +.Lmark_introspection_return_switch_case_bad: + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg + .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 +.endm + +#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#error "Array and field introspection code sharing requires same LDR offset." +#endif +.macro BRBMI_ARRAY_LOAD index_reg + ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. + b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. + .balign 8 // Add padding to 8 bytes. +.endm + +.macro BRBMI_BKPT_FILL_4B + bkpt 0 + bkpt 0 +.endm + +.macro BRBMI_BKPT_FILL_8B + BRBMI_BKPT_FILL_4B + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_RUNTIME_CALL + // Note: This macro generates exactly 22 bytes of code. The core register + // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. + + push {r0-r3, r7, lr} // Save return address and caller-save registers. + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r7, 16 + .cfi_rel_offset lr, 20 + + mov r0, ip // Pass the reference. + vpush {s0-s15} // save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 + bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) + vpop {s0-s15} // restore floating-point registers + .cfi_adjust_cfa_offset -64 + mov ip, r0 // Move reference to ip in preparation for return switch. + + pop {r0-r3, r7, lr} // Restore registers. + .cfi_adjust_cfa_offset -24 + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r7 + .cfi_restore lr +.endm + +.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix + // If reference is null, just return it in the right register. + cmp ip, #0 + beq .Lmark_introspection_return\label_suffix + // Use R4 as temp and check the mark bit of the reference. + ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lmark_introspection_unmarked\label_suffix +.Lmark_introspection_return\label_suffix: +.endm + +.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix +.Lmark_introspection_unmarked\label_suffix: + // Check if the top two bits are one, if this is the case it is a forwarding address. +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in + // the highest bits and the "forwarding address" state to have all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + bhs .Lmark_introspection_forwarding_address\label_suffix +.endm + +.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix +.Lmark_introspection_forwarding_address\label_suffix: + // Note: This macro generates exactly 22 bytes of code, the branch is near. + + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + b .Lmark_introspection_return\label_suffix +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset + // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset + 2)] +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset + // Load the 16-bit instruction. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset)] +.endm + +.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix + .balign 64 + .thumb_func + .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function + .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix + .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix +art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: + BRBMI_RUNTIME_CALL + // Load the LDR (or the half of it) that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset + b .Lmark_introspection_extract_register_and_return\label_suffix + // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for + // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze + // the 6 byte forwarding address extraction here across the 32-byte boundary. + BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix + // And the slow path taking exactly 30 bytes (6 bytes for the forwarding + // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near + // branch) shall take the rest of the 32-byte section (within a cache line). + BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix + BRBMI_RUNTIME_CALL + b .Lmark_introspection_return\label_suffix +.endm + + /* + * Use introspection to load a reference from the same address as the LDR + * instruction in generated code would load (unless loaded by the thunk, + * see below), call ReadBarrier::Mark() with that reference if needed + * and return it in the same register as the LDR instruction would load. + * + * The entrypoint is called through a thunk that differs across load kinds. + * For field and array loads the LDR instruction in generated code follows + * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) + * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where + * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk + * knows the holder and performs the gray bit check, returning to the LDR + * instruction if the object is not gray, so this entrypoint no longer + * needs to know anything about the holder. For GC root loads, the LDR + * instruction in generated code precedes the branch to the thunk, i.e. the + * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) + * where the -1 is again the Thumb mode bit adjustment, and the thunk does + * not do the gray bit check. + * + * For field accesses and array loads with a constant index the thunk loads + * the reference into IP using introspection and calls the main entrypoint, + * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, + * the passed reference is poisoned. + * + * For array accesses with non-constant index, the thunk inserts the bits + * 0-5 of the LDR instruction to the entrypoint address, effectively + * calculating a switch case label based on the index register (bits 0-3) + * and adding an extra offset (bits 4-5 hold the shift which is always 2 + * for reference loads) to differentiate from the main entrypoint, then + * moves the base register to IP and jumps to the switch case. Therefore + * we need to align the main entrypoint to 512 bytes, accounting for + * a 256-byte offset followed by 16 array entrypoints starting at + * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR + * (register) and a branch to the main entrypoint. + * + * For GC root accesses we cannot use the main entrypoint because of the + * different offset where the LDR instruction in generated code is located. + * (And even with heap poisoning enabled, GC roots are not poisoned.) + * To re-use the same entrypoint pointer in generated code, we make sure + * that the gc root entrypoint (a copy of the entrypoint with a different + * offset for introspection loads) is located at a known offset (128 bytes, + * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main + * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves + * the root register to IP and jumps to the customized entrypoint, + * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also + * performs all the fast-path checks, so we need just the slow path. + * + * The code structure is + * art_quick_read_barrier_mark_introspection: + * Up to 32 bytes code for main entrypoint fast-path code for fields + * (and array elements with constant offset) with LDR encoding T3; + * jumps to the switch in the "narrow" entrypoint. + * Padding to 32 bytes if needed. + * art_quick_read_barrier_mark_introspection_narrow: + * Up to 48 bytes code for fast path code for fields (and array + * elements with constant offset) with LDR encoding T1, ending in the + * return switch instruction TBB and the table with switch offsets. + * Padding to 80 bytes if needed. + * .Lmark_introspection_return_switch_case_r0: + * Exactly 48 bytes of code for the return switch cases (12 cases, + * including BKPT for the reserved registers). + * Ends at 128 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_wide: + * GC root entrypoint code for LDR encoding T3 (28 bytes). + * Forwarding address extraction for LDR encoding T3 (6 bytes). + * Slow path for main entrypoint for LDR encoding T3 (30 bytes). + * Ends at 192 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_narrow: + * GC root entrypoint code for LDR encoding T1 (28 bytes). + * Forwarding address extraction for LDR encoding T1 (6 bytes). + * Slow path for main entrypoint for LDR encoding T1 (30 bytes). + * Ends at 256 bytes total. + * art_quick_read_barrier_mark_introspection_arrays: + * Exactly 128 bytes for array load switch cases (16x2 instructions). + */ + .balign 512 +ENTRY art_quick_read_barrier_mark_introspection + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // Check for null or marked, lock word is loaded into IP. + BRBMI_CHECK_NULL_AND_MARKED _wide + // Load the half of the instruction that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +.Lmark_introspection_extract_register_and_return_wide: + lsr r4, r4, #12 // Extract `ref_reg`. + b .Lmark_introspection_return_switch + + .balign 32 + .thumb_func + .type art_quick_read_barrier_mark_introspection_narrow, #function + .hidden art_quick_read_barrier_mark_introspection_narrow + .global art_quick_read_barrier_mark_introspection_narrow +art_quick_read_barrier_mark_introspection_narrow: + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // Check for null or marked, lock word is loaded into R4. + BRBMI_CHECK_NULL_AND_MARKED _narrow + // Load the 16-bit instruction. + BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET +.Lmark_introspection_extract_register_and_return_narrow: + and r4, r4, #7 // Extract `ref_reg`. +.Lmark_introspection_return_switch: + tbb [pc, r4] // Jump to the switch case. +.Lmark_introspection_return_table: + BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .balign 16 + BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE + + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow + + .balign 256 + .thumb_func + .type art_quick_read_barrier_mark_introspection_arrays, #function + .hidden art_quick_read_barrier_mark_introspection_arrays + .global art_quick_read_barrier_mark_introspection_arrays +art_quick_read_barrier_mark_introspection_arrays: + BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B +END art_quick_read_barrier_mark_introspection + .extern artInvokePolymorphic ENTRY art_quick_invoke_polymorphic SETUP_SAVE_REFS_AND_ARGS_FRAME r2 diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index bc7bcb1739..610cdee683 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -75,7 +75,7 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Obj extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { // ARM64 is the architecture with the largest number of core // registers (32) that supports the read barrier configuration. // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass @@ -85,35 +85,35 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { // have less core registers (resp. 16, 8 and 16). (We may have to // revise that design choice if read barrier support is added for // MIPS and/or MIPS64.) - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; - qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr; - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; - qpoints->pReadBarrierMarkReg23 = is_marking ? art_quick_read_barrier_mark_reg23 : nullptr; - qpoints->pReadBarrierMarkReg24 = is_marking ? art_quick_read_barrier_mark_reg24 : nullptr; - qpoints->pReadBarrierMarkReg25 = is_marking ? art_quick_read_barrier_mark_reg25 : nullptr; - qpoints->pReadBarrierMarkReg26 = is_marking ? art_quick_read_barrier_mark_reg26 : nullptr; - qpoints->pReadBarrierMarkReg27 = is_marking ? art_quick_read_barrier_mark_reg27 : nullptr; - qpoints->pReadBarrierMarkReg28 = is_marking ? art_quick_read_barrier_mark_reg28 : nullptr; - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg23 = is_active ? art_quick_read_barrier_mark_reg23 : nullptr; + qpoints->pReadBarrierMarkReg24 = is_active ? art_quick_read_barrier_mark_reg24 : nullptr; + qpoints->pReadBarrierMarkReg25 = is_active ? art_quick_read_barrier_mark_reg25 : nullptr; + qpoints->pReadBarrierMarkReg26 = is_active ? art_quick_read_barrier_mark_reg26 : nullptr; + qpoints->pReadBarrierMarkReg27 = is_active ? art_quick_read_barrier_mark_reg27 : nullptr; + qpoints->pReadBarrierMarkReg28 = is_active ? art_quick_read_barrier_mark_reg28 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; // Check that array switch cases are at appropriate offsets from the introspection entrypoint. DCHECK_ALIGNED(art_quick_read_barrier_mark_introspection, 512u); @@ -128,7 +128,7 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); // The register 16, i.e. IP0, is reserved, so there is no art_quick_read_barrier_mark_reg16. // We're using the entry to hold a pointer to the introspection entrypoint instead. - qpoints->pReadBarrierMarkReg16 = is_marking ? art_quick_read_barrier_mark_introspection : nullptr; + qpoints->pReadBarrierMarkReg16 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -188,7 +188,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierMarkReg16 = nullptr; // IP0 is used as a temp by the asm stub. - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierSlow = artReadBarrierSlow; qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index c555126668..18015b572e 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2219,7 +2219,7 @@ END art_quick_instrumentation_exit ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME mov x0, xSELF // Pass thread. - bl artDeoptimize // artDeoptimize(Thread*) + bl artDeoptimize // (Thread*) brk 0 END art_quick_deoptimize @@ -2230,8 +2230,8 @@ END art_quick_deoptimize .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME - mov x0, xSELF // Pass thread. - bl artDeoptimizeFromCompiledCode // artDeoptimizeFromCompiledCode(Thread*) + mov x1, xSELF // Pass thread. + bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*) brk 0 END art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 434e33c42a..9978da5f74 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -86,68 +86,68 @@ extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR] extern "C" int64_t __divdi3(int64_t, int64_t); extern "C" int64_t __moddi3(int64_t, int64_t); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29), "Non-direct C stub marked direct."); } @@ -160,7 +160,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub; // Alloc - ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false); + ResetQuickAllocEntryPoints(qpoints, /*is_active*/ false); // Cast qpoints->pInstanceofNonTrivial = artInstanceOfFromCode; @@ -412,7 +412,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct."); - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 61a3a04708..e628a9f40d 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -421,7 +421,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1 addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack .cfi_adjust_cfa_offset -ARG_SLOT_SIZE @@ -490,8 +490,10 @@ .cfi_restore 6 lw $a1, 160($sp) .cfi_restore 5 + .if \restore_a0 lw $a0, 156($sp) .cfi_restore 4 + .endif lw $v1, 152($sp) .cfi_restore 3 lw $v0, 148($sp) @@ -507,16 +509,26 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + * Requires $gp properly set up. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY la $t9, artDeliverPendingExceptionFromCode jalr $zero, $t9 # artDeliverPendingExceptionFromCode(Thread*) move $a0, rSELF # pass Thread::Current .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + * Requires $gp properly set up. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_IF_NO_EXCEPTION lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME @@ -1660,30 +1672,51 @@ ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, art GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// Macro for string and type resolution and initialization. +// $a0 is both input and output. +.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint + .extern \entrypoint +ENTRY_NO_GP \name + SETUP_SAVE_EVERYTHING_FRAME # Save everything in case of GC. + move $s2, $gp # Preserve $gp across the call for exception delivery. + la $t9, \entrypoint + jalr $t9 # (uint32_t index, Thread*) + move $a1, rSELF # Pass Thread::Current (in delay slot). + beqz $v0, 1f # Success? + move $a0, $v0 # Move result to $a0 (in delay slot). + RESTORE_SAVE_EVERYTHING_FRAME 0 # Restore everything except $a0. + jalr $zero, $ra # Return on success. + nop +1: + move $gp, $s2 + DELIVER_PENDING_EXCEPTION_FRAME_READY +END \name +.endm + /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an * exception on error. On success the String is returned. A0 holds the string index. The fast * path check for hit in strings cache has already been performed. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode /* * Entry from managed code when uninitialized static storage, this stub will run the class * initializer and deliver the exception on error. On success the static storage base is * returned. */ -ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode /* * Entry from managed code when dex cache misses for a type_idx. */ -ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode /* * Entry from managed code when type_idx needs to be checked for access and dex cache may also * miss. */ -ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode /* * Called by managed code when the value in rSUSPEND has been decremented to 0. @@ -1854,7 +1887,8 @@ ENTRY art_quick_generic_jni_trampoline nop 2: - lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) + lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) + move $gp, $s3 # restore $gp from $s3 # This will create a new save-all frame, required by the runtime. DELIVER_PENDING_EXCEPTION END art_quick_generic_jni_trampoline @@ -1944,8 +1978,7 @@ END art_quick_instrumentation_exit ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME la $t9, artDeoptimize - jalr $t9 # artDeoptimize(Thread*) - # Returns caller method's frame size. + jalr $t9 # (Thread*) move $a0, rSELF # pass Thread::current END art_quick_deoptimize @@ -1957,9 +1990,8 @@ END art_quick_deoptimize ENTRY art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME la $t9, artDeoptimizeFromCompiledCode - jalr $t9 # artDeoptimizeFromCompiledCode(Thread*) - # Returns caller method's frame size. - move $a0, rSELF # pass Thread::current + jalr $t9 # (DeoptimizationKind, Thread*) + move $a1, rSELF # pass Thread::current END art_quick_deoptimize_from_compiled_code /* @@ -2213,8 +2245,32 @@ END art_quick_string_compareto */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - /* TODO: optimizations: mark bit, forwarding. */ - addiu $sp, $sp, -160 # includes 16 bytes of space for argument registers a0-a3 + // Null check so that we can load the lock word. + bnez \reg, .Lnot_null_\name + nop +.Lret_rb_\name: + jalr $zero, $ra + nop +.Lnot_null_\name: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg) + .set push + .set noat + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltz $at, .Lret_rb_\name +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bltz $at, .Lret_forwarding_address\name + nop + .set pop + + addiu $sp, $sp, -160 # Includes 16 bytes of space for argument registers a0-a3. .cfi_adjust_cfa_offset 160 sw $ra, 156($sp) @@ -2319,6 +2375,12 @@ ENTRY \name jalr $zero, $ra addiu $sp, $sp, 160 .cfi_adjust_cfa_offset -160 + +.Lret_forwarding_address\name: + jalr $zero, $ra + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + sll \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT END \name .endm diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index f8242ae1b5..763d93eb47 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -86,27 +86,27 @@ extern "C" int64_t __divdi3(int64_t, int64_t); extern "C" int64_t __moddi3(int64_t, int64_t); // No read barrier entrypoints for marking registers. -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -168,7 +168,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 24caa0e290..40bad16291 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -447,7 +447,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1 // Restore FP registers. l.d $f31, 264($sp) l.d $f30, 256($sp) @@ -530,8 +530,10 @@ .cfi_restore 6 ld $a1, 304($sp) .cfi_restore 5 + .if \restore_a0 ld $a0, 296($sp) .cfi_restore 4 + .endif ld $v1, 288($sp) .cfi_restore 3 ld $v0, 280($sp) @@ -547,18 +549,26 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, - * where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + * Requires $gp properly set up. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_GP - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY dla $t9, artDeliverPendingExceptionFromCode jalr $zero, $t9 # artDeliverPendingExceptionFromCode(Thread*) move $a0, rSELF # pass Thread::Current .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_GP + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_IF_NO_EXCEPTION ld $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME @@ -1615,30 +1625,48 @@ ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, art GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// Macro for string and type resolution and initialization. +// $a0 is both input and output. +.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint + .extern \entrypoint +ENTRY_NO_GP \name + SETUP_SAVE_EVERYTHING_FRAME # Save everything in case of GC. + dla $t9, \entrypoint + jalr $t9 # (uint32_t index, Thread*) + move $a1, rSELF # Pass Thread::Current (in delay slot). + beqz $v0, 1f # Success? + move $a0, $v0 # Move result to $a0 (in delay slot). + RESTORE_SAVE_EVERYTHING_FRAME 0 # Restore everything except $a0. + jic $ra, 0 # Return on success. +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY +END \name +.endm + /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an * exception on error. On success the String is returned. A0 holds the string index. The fast * path check for hit in strings cache has already been performed. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode /* * Entry from managed code when uninitialized static storage, this stub will run the class * initializer and deliver the exception on error. On success the static storage base is * returned. */ -ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode /* * Entry from managed code when dex cache misses for a type_idx. */ -ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode /* * Entry from managed code when type_idx needs to be checked for access and dex cache may also * miss. */ -ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode /* * Called by managed code when the value in rSUSPEND has been decremented to 0. @@ -1885,8 +1913,7 @@ END art_quick_instrumentation_exit .extern artEnterInterpreterFromDeoptimize ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME - jal artDeoptimize # artDeoptimize(Thread*, SP) - # Returns caller method's frame size. + jal artDeoptimize # artDeoptimize(Thread*) move $a0, rSELF # pass Thread::current END art_quick_deoptimize @@ -1897,9 +1924,8 @@ END art_quick_deoptimize .extern artDeoptimizeFromCompiledCode ENTRY_NO_GP art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME - jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*, SP) - # Returns caller method's frame size. - move $a0, rSELF # pass Thread::current + jal artDeoptimizeFromCompiledCode # (DeoptimizationKind, Thread*) + move $a1, rSELF # pass Thread::current END art_quick_deoptimize_from_compiled_code .set push @@ -2067,7 +2093,29 @@ END art_quick_indexof */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - /* TODO: optimizations: mark bit, forwarding. */ + // Null check so that we can load the lock word. + bnezc \reg, .Lnot_null_\name + nop +.Lret_rb_\name: + jic $ra, 0 +.Lnot_null_\name: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg) + .set push + .set noat + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltzc $at, .Lret_rb_\name +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bltzc $at, .Lret_forwarding_address\name + .set pop + daddiu $sp, $sp, -320 .cfi_adjust_cfa_offset 320 @@ -2202,6 +2250,13 @@ ENTRY \name jalr $zero, $ra daddiu $sp, $sp, 320 .cfi_adjust_cfa_offset -320 + +.Lret_forwarding_address\name: + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + sll \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + jalr $zero, $ra + dext \reg, \reg, 0, 32 # Make sure the address is zero-extended. END \name .endm diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 9cd4a3ee3b..102faf19d4 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -44,14 +44,14 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -97,7 +97,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg04 = nullptr; // Cannot use register 4 (ESP) to pass arguments. // x86 has only 8 core registers. qpoints->pReadBarrierMarkReg08 = nullptr; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 3694c3e7a6..2222f5cc3b 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -2028,7 +2028,7 @@ DEFINE_FUNCTION art_quick_deoptimize CFI_ADJUST_CFA_OFFSET(12) pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + call SYMBOL(artDeoptimize) // (Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize @@ -2038,11 +2038,12 @@ END_FUNCTION art_quick_deoptimize */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME ebx, ebx - subl LITERAL(12), %esp // Align stack. - CFI_ADJUST_CFA_OFFSET(12) + subl LITERAL(8), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + PUSH eax + call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index a326b4eebc..1e56e8a087 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -55,22 +55,22 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; - qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -119,7 +119,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg04 = nullptr; // Cannot use register 4 (RSP) to pass arguments. // x86-64 has only 16 core registers. qpoints->pReadBarrierMarkReg16 = nullptr; diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index ad7c2b3765..41651d8f1a 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1983,7 +1983,7 @@ DEFINE_FUNCTION art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Stack should be aligned now. movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + call SYMBOL(artDeoptimize) // (Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize @@ -1994,8 +1994,8 @@ END_FUNCTION art_quick_deoptimize DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread. + call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize_from_compiled_code diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index 935fd81115..136ed12362 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -33,6 +33,7 @@ constexpr size_t Arena::kDefaultSize; template <bool kCount> const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { + // Every name should have the same width and end with a space. Abbreviate if necessary: "Misc ", "SwitchTbl ", "SlowPaths ", @@ -49,6 +50,7 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Successors ", "Dominated ", "Instruction ", + "CtorFenceIns ", "InvokeInputs ", "PhiInputs ", "LoopInfo ", diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index c39429ce06..60b6ea8d7a 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -59,6 +59,7 @@ enum ArenaAllocKind { kArenaAllocSuccessors, kArenaAllocDominated, kArenaAllocInstruction, + kArenaAllocConstructorFenceInputs, kArenaAllocInvokeInputs, kArenaAllocPhiInputs, kArenaAllocLoopInfo, diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 2414b5f937..03ae63a068 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -373,19 +373,19 @@ class SHARED_LOCKABLE ReaderWriterMutex : public BaseMutex { bool IsSharedHeld(const Thread* self) const; // Assert the current thread has shared access to the ReaderWriterMutex. - void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { + ALWAYS_INLINE void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { if (kDebugLocking && (gAborting == 0)) { // TODO: we can only assert this well when self != null. CHECK(IsSharedHeld(self) || self == nullptr) << *this; } } - void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { + ALWAYS_INLINE void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { AssertSharedHeld(self); } // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive // mode. - void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) { + ALWAYS_INLINE void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) { if (kDebugLocking && (gAborting == 0)) { CHECK(!IsSharedHeld(self)) << *this; } diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc index 5394e53fa3..862f0d0b00 100644 --- a/runtime/base/scoped_flock.cc +++ b/runtime/base/scoped_flock.cc @@ -33,11 +33,22 @@ bool ScopedFlock::Init(const char* filename, std::string* error_msg) { } bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) { + return Init(filename, flags, block, true, error_msg); +} + +bool ScopedFlock::Init(const char* filename, + int flags, + bool block, + bool flush_on_close, + std::string* error_msg) { + flush_on_close_ = flush_on_close; while (true) { if (file_.get() != nullptr) { UNUSED(file_->FlushCloseOrErase()); // Ignore result. } - file_.reset(OS::OpenFileWithFlags(filename, flags)); + + bool check_usage = flush_on_close; // Check usage only if we need to flush on close. + file_.reset(OS::OpenFileWithFlags(filename, flags, check_usage)); if (file_.get() == nullptr) { *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno)); return false; @@ -86,6 +97,7 @@ bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* } bool ScopedFlock::Init(File* file, std::string* error_msg) { + flush_on_close_ = true; file_.reset(new File(dup(file->Fd()), file->GetPath(), file->CheckUsage(), file->ReadOnlyMode())); if (file_->Fd() == -1) { file_.reset(); @@ -111,17 +123,21 @@ bool ScopedFlock::HasFile() { return file_.get() != nullptr; } -ScopedFlock::ScopedFlock() { } +ScopedFlock::ScopedFlock() : flush_on_close_(true) { } ScopedFlock::~ScopedFlock() { if (file_.get() != nullptr) { int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN)); if (flock_result != 0) { - PLOG(FATAL) << "Unable to unlock file " << file_->GetPath(); - UNREACHABLE(); + // Only printing a warning is okay since this is only used with either: + // 1) a non-blocking Init call, or + // 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent + // deadlocks. + // This means we can be sure that the warning won't cause a deadlock. + PLOG(WARNING) << "Unable to unlock file " << file_->GetPath(); } int close_result = -1; - if (file_->ReadOnlyMode()) { + if (file_->ReadOnlyMode() || !flush_on_close_) { close_result = file_->Close(); } else { close_result = file_->FlushCloseOrErase(); diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h index cc22056443..a3a320f4cc 100644 --- a/runtime/base/scoped_flock.h +++ b/runtime/base/scoped_flock.h @@ -25,6 +25,15 @@ namespace art { +// A scoped file-lock implemented using flock. The file is locked by calling the Init function and +// is released during destruction. Note that failing to unlock the file only causes a warning to be +// printed. Users should take care that this does not cause potential deadlocks. +// +// Only printing a warning on unlock failure is okay since this is only used with either: +// 1) a non-blocking Init call, or +// 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent +// deadlocks. +// This means we can be sure that the warning won't cause a deadlock. class ScopedFlock { public: ScopedFlock(); @@ -38,7 +47,16 @@ class ScopedFlock { // locking will be retried if the file changed. In non-blocking mode, false // is returned and no attempt is made to re-acquire the lock. // + // The argument `flush_on_close` controls whether or not the file + // will be explicitly flushed before close. + // // The file is opened with the provided flags. + bool Init(const char* filename, + int flags, + bool block, + bool flush_on_close, + std::string* error_msg); + // Calls Init(filename, flags, block, true, error_msg); bool Init(const char* filename, int flags, bool block, std::string* error_msg); // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg) bool Init(const char* filename, std::string* error_msg); @@ -57,6 +75,7 @@ class ScopedFlock { private: std::unique_ptr<File> file_; + bool flush_on_close_; DISALLOW_COPY_AND_ASSIGN(ScopedFlock); }; diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index 56e8aa3685..a29cc6cd38 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -249,6 +249,12 @@ class CheckJniAbortCatcher { return; \ } +#define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \ + if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \ + printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \ + return; \ + } + } // namespace art namespace std { diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc index dbfcdfe874..4847f38489 100644 --- a/runtime/compiler_filter.cc +++ b/runtime/compiler_filter.cc @@ -140,6 +140,26 @@ CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter f UNREACHABLE(); } +CompilerFilter::Filter CompilerFilter::GetSafeModeFilterFrom(Filter filter) { + // For safe mode, we should not return a filter that generates AOT compiled + // code. + switch (filter) { + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: + return filter; + + case CompilerFilter::kSpace: + case CompilerFilter::kSpeed: + case CompilerFilter::kEverything: + case CompilerFilter::kSpaceProfile: + case CompilerFilter::kSpeedProfile: + case CompilerFilter::kEverythingProfile: + return CompilerFilter::kQuicken; + } + UNREACHABLE(); +} bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) { return current >= target; diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h index 9cb54b14b6..f802439053 100644 --- a/runtime/compiler_filter.h +++ b/runtime/compiler_filter.h @@ -75,6 +75,9 @@ class CompilerFilter FINAL { // Returns a non-profile-guided version of the given filter. static Filter GetNonProfileDependentFilterFrom(Filter filter); + // Returns a filter suitable for safe mode. + static Filter GetSafeModeFilterFrom(Filter filter); + // Returns true if the 'current' compiler filter is considered at least as // good as the 'target' compilation type. // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc index a59165f958..383f4e3666 100644 --- a/runtime/compiler_filter_test.cc +++ b/runtime/compiler_filter_test.cc @@ -28,6 +28,13 @@ static void TestCompilerFilterName(CompilerFilter::Filter filter, std::string na EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter)); } +static void TestSafeModeFilter(CompilerFilter::Filter expected, std::string name) { + CompilerFilter::Filter parsed; + EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed)); + EXPECT_EQ(expected, CompilerFilter::GetSafeModeFilterFrom(parsed)); +} + + // Verify the dexopt status values from dalvik.system.DexFile // match the OatFileAssistant::DexOptStatus values. TEST(CompilerFilterTest, ParseCompilerFilter) { @@ -47,4 +54,17 @@ TEST(CompilerFilterTest, ParseCompilerFilter) { EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter)); } +TEST(CompilerFilterTest, SafeModeFilter) { + TestSafeModeFilter(CompilerFilter::kAssumeVerified, "assume-verified"); + TestSafeModeFilter(CompilerFilter::kExtract, "extract"); + TestSafeModeFilter(CompilerFilter::kVerify, "verify"); + TestSafeModeFilter(CompilerFilter::kQuicken, "quicken"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything"); +} + } // namespace art diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h new file mode 100644 index 0000000000..14e189c5d1 --- /dev/null +++ b/runtime/deoptimization_kind.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_DEOPTIMIZATION_KIND_H_ +#define ART_RUNTIME_DEOPTIMIZATION_KIND_H_ + +namespace art { + +enum class DeoptimizationKind { + kAotInlineCache = 0, + kJitInlineCache, + kJitSameTarget, + kLoopBoundsBCE, + kLoopNullBCE, + kBlockBCE, + kCHA, + kFullFrame, + kLast = kFullFrame +}; + +inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { + switch (kind) { + case DeoptimizationKind::kAotInlineCache: return "AOT inline cache"; + case DeoptimizationKind::kJitInlineCache: return "JIT inline cache"; + case DeoptimizationKind::kJitSameTarget: return "JIT same target"; + case DeoptimizationKind::kLoopBoundsBCE: return "loop bounds check elimination"; + case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null"; + case DeoptimizationKind::kBlockBCE: return "block bounds check elimination"; + case DeoptimizationKind::kCHA: return "class hierarchy analysis"; + case DeoptimizationKind::kFullFrame: return "full frame"; + } + LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind); + UNREACHABLE(); +} + +std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind); + +} // namespace art + +#endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_ diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h index e58c6f541e..6765407949 100644 --- a/runtime/dex2oat_environment_test.h +++ b/runtime/dex2oat_environment_test.h @@ -42,7 +42,16 @@ class Dex2oatEnvironmentTest : public CommonRuntimeTest { CommonRuntimeTest::SetUp(); // Create a scratch directory to work from. - scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest"; + + // Get the realpath of the android data. The oat dir should always point to real location + // when generating oat files in dalvik-cache. This avoids complicating the unit tests + // when matching the expected paths. + UniqueCPtr<const char[]> android_data_real(realpath(android_data_.c_str(), nullptr)); + ASSERT_TRUE(android_data_real != nullptr) + << "Could not get the realpath of the android data" << android_data_ << strerror(errno); + + scratch_dir_.assign(android_data_real.get()); + scratch_dir_ += "/Dex2oatEnvironmentTest"; ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700)); // Create a subdirectory in scratch for odex files. diff --git a/runtime/dex_file.h b/runtime/dex_file.h index 1b18d21cb1..36c734197a 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -92,8 +92,8 @@ class DexFile { uint32_t method_ids_off_; // file offset of MethodIds array uint32_t class_defs_size_; // number of ClassDefs uint32_t class_defs_off_; // file offset of ClassDef array - uint32_t data_size_; // unused - uint32_t data_off_; // unused + uint32_t data_size_; // size of data section + uint32_t data_off_; // file offset of data section // Decode the dex magic version uint32_t GetVersion() const; diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc index 13979160bd..f21f1a2704 100644 --- a/runtime/dex_file_annotations.cc +++ b/runtime/dex_file_annotations.cc @@ -1421,11 +1421,20 @@ mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirro } const char* GetSourceDebugExtension(Handle<mirror::Class> klass) { + // Before instantiating ClassData, check that klass has a DexCache + // assigned. The ClassData constructor indirectly dereferences it + // when calling klass->GetDexFile(). + if (klass->GetDexCache() == nullptr) { + DCHECK(klass->IsPrimitive() || klass->IsArrayClass()); + return nullptr; + } + ClassData data(klass); const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data); if (annotation_set == nullptr) { return nullptr; } + const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet( data.GetDexFile(), annotation_set, @@ -1434,6 +1443,7 @@ const char* GetSourceDebugExtension(Handle<mirror::Class> klass) { if (annotation_item == nullptr) { return nullptr; } + const uint8_t* annotation = SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value"); if (annotation == nullptr) { diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index 11b3cd025a..c18ab47739 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -922,12 +922,12 @@ bool DexFileVerifier::CheckEncodedAnnotation() { return true; } -bool DexFileVerifier::FindClassFlags(uint32_t index, - bool is_field, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { +bool DexFileVerifier::FindClassIndexAndDef(uint32_t index, + bool is_field, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** output_class_def) { DCHECK(class_type_index != nullptr); - DCHECK(class_access_flags != nullptr); + DCHECK(output_class_def != nullptr); // First check if the index is valid. if (index >= (is_field ? header_->field_ids_size_ : header_->method_ids_size_)) { @@ -957,7 +957,7 @@ bool DexFileVerifier::FindClassFlags(uint32_t index, for (size_t i = 0; i < header_->class_defs_size_; ++i) { const DexFile::ClassDef* class_def = class_def_begin + i; if (class_def->class_idx_ == *class_type_index) { - *class_access_flags = class_def->access_flags_; + *output_class_def = class_def; return true; } } @@ -966,13 +966,13 @@ bool DexFileVerifier::FindClassFlags(uint32_t index, return false; } -bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, - const char* type_descr, - uint32_t curr_index, - uint32_t prev_index, - bool* have_class, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { +bool DexFileVerifier::CheckOrderAndGetClassDef(bool is_field, + const char* type_descr, + uint32_t curr_index, + uint32_t prev_index, + bool* have_class, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** class_def) { if (curr_index < prev_index) { ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32, type_descr, @@ -982,7 +982,7 @@ bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, } if (!*have_class) { - *have_class = FindClassFlags(curr_index, is_field, class_type_index, class_access_flags); + *have_class = FindClassIndexAndDef(curr_index, is_field, class_type_index, class_def); if (!*have_class) { // Should have really found one. ErrorStringPrintf("could not find declaring class for %s index %" PRIu32, @@ -994,34 +994,130 @@ bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, return true; } +bool DexFileVerifier::CheckStaticFieldTypes(const DexFile::ClassDef* class_def) { + if (class_def == nullptr) { + return true; + } + + ClassDataItemIterator field_it(*dex_file_, ptr_); + EncodedStaticFieldValueIterator array_it(*dex_file_, *class_def); + + for (; field_it.HasNextStaticField() && array_it.HasNext(); field_it.Next(), array_it.Next()) { + uint32_t index = field_it.GetMemberIndex(); + const DexFile::TypeId& type_id = dex_file_->GetTypeId(dex_file_->GetFieldId(index).type_idx_); + const char* field_type_name = + dex_file_->GetStringData(dex_file_->GetStringId(type_id.descriptor_idx_)); + Primitive::Type field_type = Primitive::GetType(field_type_name[0]); + EncodedArrayValueIterator::ValueType array_type = array_it.GetValueType(); + // Ensure this matches RuntimeEncodedStaticFieldValueIterator. + switch (array_type) { + case EncodedArrayValueIterator::ValueType::kBoolean: + if (field_type != Primitive::kPrimBoolean) { + ErrorStringPrintf("unexpected static field initial value type: 'Z' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kByte: + if (field_type != Primitive::kPrimByte) { + ErrorStringPrintf("unexpected static field initial value type: 'B' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kShort: + if (field_type != Primitive::kPrimShort) { + ErrorStringPrintf("unexpected static field initial value type: 'S' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kChar: + if (field_type != Primitive::kPrimChar) { + ErrorStringPrintf("unexpected static field initial value type: 'C' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kInt: + if (field_type != Primitive::kPrimInt) { + ErrorStringPrintf("unexpected static field initial value type: 'I' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kLong: + if (field_type != Primitive::kPrimLong) { + ErrorStringPrintf("unexpected static field initial value type: 'J' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kFloat: + if (field_type != Primitive::kPrimFloat) { + ErrorStringPrintf("unexpected static field initial value type: 'F' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kDouble: + if (field_type != Primitive::kPrimDouble) { + ErrorStringPrintf("unexpected static field initial value type: 'D' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kNull: + case EncodedArrayValueIterator::ValueType::kString: + case EncodedArrayValueIterator::ValueType::kType: + if (field_type != Primitive::kPrimNot) { + ErrorStringPrintf("unexpected static field initial value type: 'L' vs '%c'", + field_type_name[0]); + return false; + } + break; + default: + ErrorStringPrintf("unexpected static field initial value type: %x", array_type); + return false; + } + } + + if (array_it.HasNext()) { + ErrorStringPrintf("too many static field initial values"); + return false; + } + return true; +} + template <bool kStatic> bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { + const DexFile::ClassDef** class_def) { DCHECK(it != nullptr); // These calls use the raw access flags to check whether the whole dex field is valid. uint32_t prev_index = 0; for (; kStatic ? it->HasNextStaticField() : it->HasNextInstanceField(); it->Next()) { uint32_t curr_index = it->GetMemberIndex(); - if (!CheckOrderAndGetClassFlags(true, - kStatic ? "static field" : "instance field", - curr_index, - prev_index, - have_class, - class_type_index, - class_access_flags)) { + if (!CheckOrderAndGetClassDef(true, + kStatic ? "static field" : "instance field", + curr_index, + prev_index, + have_class, + class_type_index, + class_def)) { return false; } - prev_index = curr_index; - + DCHECK(class_def != nullptr); if (!CheckClassDataItemField(curr_index, it->GetRawMemberAccessFlags(), - *class_access_flags, + (*class_def)->access_flags_, *class_type_index, kStatic)) { return false; } + + prev_index = curr_index; } return true; @@ -1033,30 +1129,31 @@ bool DexFileVerifier::CheckIntraClassDataItemMethods( std::unordered_set<uint32_t>* direct_method_indexes, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { + const DexFile::ClassDef** class_def) { uint32_t prev_index = 0; for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) { uint32_t curr_index = it->GetMemberIndex(); - if (!CheckOrderAndGetClassFlags(false, - kDirect ? "direct method" : "virtual method", - curr_index, - prev_index, - have_class, - class_type_index, - class_access_flags)) { + if (!CheckOrderAndGetClassDef(false, + kDirect ? "direct method" : "virtual method", + curr_index, + prev_index, + have_class, + class_type_index, + class_def)) { return false; } - prev_index = curr_index; - + DCHECK(class_def != nullptr); if (!CheckClassDataItemMethod(curr_index, it->GetRawMemberAccessFlags(), - *class_access_flags, + (*class_def)->access_flags_, *class_type_index, it->GetMethodCodeItemOffset(), direct_method_indexes, kDirect)) { return false; } + + prev_index = curr_index; } return true; @@ -1071,19 +1168,19 @@ bool DexFileVerifier::CheckIntraClassDataItem() { // as the lookup is expensive, cache the result. bool have_class = false; dex::TypeIndex class_type_index; - uint32_t class_access_flags; + const DexFile::ClassDef* class_def = nullptr; // Check fields. if (!CheckIntraClassDataItemFields<true>(&it, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } if (!CheckIntraClassDataItemFields<false>(&it, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } @@ -1092,18 +1189,25 @@ bool DexFileVerifier::CheckIntraClassDataItem() { &direct_method_indexes, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } if (!CheckIntraClassDataItemMethods<false>(&it, &direct_method_indexes, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } - ptr_ = it.EndDataPointer(); + const uint8_t* end_ptr = it.EndDataPointer(); + + // Check static field types against initial static values in encoded array. + if (!CheckStaticFieldTypes(class_def)) { + return false; + } + + ptr_ = end_ptr; return true; } diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h index 71b316c403..d1043c6841 100644 --- a/runtime/dex_file_verifier.h +++ b/runtime/dex_file_verifier.h @@ -86,13 +86,14 @@ class DexFileVerifier { uint32_t code_offset, std::unordered_set<uint32_t>* direct_method_indexes, bool expect_direct); - bool CheckOrderAndGetClassFlags(bool is_field, - const char* type_descr, - uint32_t curr_index, - uint32_t prev_index, - bool* have_class, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + bool CheckOrderAndGetClassDef(bool is_field, + const char* type_descr, + uint32_t curr_index, + uint32_t prev_index, + bool* have_class, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** class_def); + bool CheckStaticFieldTypes(const DexFile::ClassDef* class_def); bool CheckPadding(size_t offset, uint32_t aligned_offset); bool CheckEncodedValue(); @@ -106,7 +107,7 @@ class DexFileVerifier { bool CheckIntraClassDataItemFields(ClassDataItemIterator* it, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + const DexFile::ClassDef** class_def); // Check all methods of the given type from the given iterator. Load the class data from the first // method, if necessary (and return it), or use the given values. template <bool kDirect> @@ -114,7 +115,7 @@ class DexFileVerifier { std::unordered_set<uint32_t>* direct_method_indexes, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + const DexFile::ClassDef** class_def); bool CheckIntraCodeItem(); bool CheckIntraStringDataItem(); @@ -165,16 +166,15 @@ class DexFileVerifier { __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR; bool FailureReasonIsSet() const { return failure_reason_.size() != 0; } - // Retrieve class index and class access flag from the given member. index is the member index, - // which is taken as either a field or a method index (as designated by is_field). The result, - // if the member and declaring class could be found, is stored in class_type_index and - // class_access_flags. - // This is an expensive lookup, as we have to find the class-def by type index, which is a + // Retrieve class index and class def from the given member. index is the member index, which is + // taken as either a field or a method index (as designated by is_field). The result, if the + // member and declaring class could be found, is stored in class_type_index and class_def. + // This is an expensive lookup, as we have to find the class def by type index, which is a // linear search. The output values should thus be cached by the caller. - bool FindClassFlags(uint32_t index, - bool is_field, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + bool FindClassIndexAndDef(uint32_t index, + bool is_field, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** output_class_def); // Check validity of the given access flags, interpreted for a field in the context of a class // with the given second access flags. diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc index 7736f3d615..068e1223e5 100644 --- a/runtime/dex_file_verifier_test.cc +++ b/runtime/dex_file_verifier_test.cc @@ -123,7 +123,7 @@ static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64, // To generate a base64 encoded Dex file (such as kGoodTestDex, below) // from Smali files, use: // -// smali -o classes.dex class1.smali [class2.smali ...] +// smali assemble -o classes.dex class1.smali [class2.smali ...] // base64 classes.dex >classes.dex.base64 // For reference. @@ -1461,7 +1461,7 @@ TEST_F(DexFileVerifierTest, ProtoOrdering) { // To generate a base64 encoded Dex file version 037 from Smali files, use: // -// smali --api-level 24 -o classes.dex class1.smali [class2.smali ...] +// smali assemble --api 24 -o classes.dex class1.smali [class2.smali ...] // base64 classes.dex >classes.dex.base64 // Dex file version 037 generated from: @@ -2090,4 +2090,105 @@ TEST_F(DexFileVerifierTest, InvokeCustomDexSamples) { } } +TEST_F(DexFileVerifierTest, BadStaticFieldInitialValuesArray) { + // Generated DEX file version (037) from: + // + // .class public LBadStaticFieldInitialValuesArray; + // .super Ljava/lang/Object; + // + // # static fields + // .field static final c:C = 'c' + // .field static final i:I = 0x1 + // .field static final s:Ljava/lang/String; = "s" + // + // # direct methods + // .method public constructor <init>()V + // .registers 1 + // invoke-direct {p0}, Ljava/lang/Object;-><init>()V + // return-void + // .end method + // + // Output file was hex edited so that static field "i" has string typing in initial values array. + static const char kDexBase64[] = + "ZGV4CjAzNQBrMi4cCPcMvvXNRw0uI6RRubwMPwgEYXIsAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAL" + "AAAAcAAAAAYAAACcAAAAAQAAALQAAAADAAAAwAAAAAIAAADYAAAAAQAAAOgAAAAkAQAACAEAACAB" + "AAAoAQAAMAEAADMBAAA2AQAAOwEAAE8BAABjAQAAZgEAAGkBAABsAQAAAgAAAAMAAAAEAAAABQAA" + "AAYAAAAHAAAABwAAAAUAAAAAAAAAAgAAAAgAAAACAAEACQAAAAIABAAKAAAAAgAAAAAAAAADAAAA" + "AAAAAAIAAAABAAAAAwAAAAAAAAABAAAAAAAAAHsBAAB0AQAAAQABAAEAAABvAQAABAAAAHAQAQAA" + "AA4ABjxpbml0PgAGQS5qYXZhAAFDAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEv" + "bGFuZy9TdHJpbmc7AAFWAAFjAAFpAAFzAAEABw4AAwNjFwoXCgMAAQAAGAEYARgAgYAEiAIADQAA" + "AAAAAAABAAAAAAAAAAEAAAALAAAAcAAAAAIAAAAGAAAAnAAAAAMAAAABAAAAtAAAAAQAAAADAAAA" + "wAAAAAUAAAACAAAA2AAAAAYAAAABAAAA6AAAAAEgAAABAAAACAEAAAIgAAALAAAAIAEAAAMgAAAB" + "AAAAbwEAAAUgAAABAAAAdAEAAAAgAAABAAAAewEAAAAQAAABAAAAjAEAAA=="; + + size_t length; + std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length)); + CHECK(dex_bytes != nullptr); + // Note: `dex_file` will be destroyed before `dex_bytes`. + std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length)); + std::string error_msg; + EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(), + dex_file->Begin(), + dex_file->Size(), + "bad static field initial values array", + /*verify_checksum*/ true, + &error_msg)); +} + +TEST_F(DexFileVerifierTest, GoodStaticFieldInitialValuesArray) { + // Generated DEX file version (037) from: + // + // .class public LGoodStaticFieldInitialValuesArray; + // .super Ljava/lang/Object; + // + // # static fields + // .field static final b:B = 0x1t + // .field static final c:C = 'c' + // .field static final d:D = 0.6 + // .field static final f:F = 0.5f + // .field static final i:I = 0x3 + // .field static final j:J = 0x4L + // .field static final l1:Ljava/lang/String; + // .field static final l2:Ljava/lang/String; = "s" + // .field static final l3:Ljava/lang/Class; = Ljava/lang/String; + // .field static final s:S = 0x2s + // .field static final z:Z = true + // + // # direct methods + // .method public constructor <init>()V + // .registers 1 + // invoke-direct {p0}, Ljava/lang/Object;-><init>()V + // return-void + // .end method + static const char kDexBase64[] = + "ZGV4CjAzNQAwWxLbdhFa1NGiFWjsy5fhUCHxe5QHtPY8AwAAcAAAAHhWNBIAAAAAAAAAAJwCAAAZ" + "AAAAcAAAAA0AAADUAAAAAQAAAAgBAAALAAAAFAEAAAIAAABsAQAAAQAAAHwBAACgAQAAnAEAAJwB" + "AACkAQAApwEAAKoBAACtAQAAsAEAALMBAAC2AQAA2wEAAO4BAAACAgAAFgIAABkCAAAcAgAAHwIA" + "ACICAAAlAgAAKAIAACsCAAAuAgAAMQIAADUCAAA5AgAAPQIAAEACAAABAAAAAgAAAAMAAAAEAAAA" + "BQAAAAYAAAAHAAAACAAAAAkAAAAKAAAACwAAAAwAAAANAAAADAAAAAsAAAAAAAAABgAAAA4AAAAG" + "AAEADwAAAAYAAgAQAAAABgADABEAAAAGAAQAEgAAAAYABQATAAAABgAJABQAAAAGAAkAFQAAAAYA" + "BwAWAAAABgAKABcAAAAGAAwAGAAAAAYAAAAAAAAACAAAAAAAAAAGAAAAAQAAAAgAAAAAAAAA////" + "/wAAAAB8AgAARAIAAAY8aW5pdD4AAUIAAUMAAUQAAUYAAUkAAUoAI0xHb29kU3RhdGljRmllbGRJ" + "bml0aWFsVmFsdWVzQXJyYXk7ABFMamF2YS9sYW5nL0NsYXNzOwASTGphdmEvbGFuZy9PYmplY3Q7" + "ABJMamF2YS9sYW5nL1N0cmluZzsAAVMAAVYAAVoAAWIAAWMAAWQAAWYAAWkAAWoAAmwxAAJsMgAC" + "bDMAAXMAAXoAAAsAAQNj8TMzMzMzM+M/ED8EAwYEHhcXGAkCAj8AAAAAAQABAAEAAAAAAAAABAAA" + "AHAQAQAAAA4ACwABAAAYARgBGAEYARgBGAEYARgBGAEYARgAgYAE5AQNAAAAAAAAAAEAAAAAAAAA" + "AQAAABkAAABwAAAAAgAAAA0AAADUAAAAAwAAAAEAAAAIAQAABAAAAAsAAAAUAQAABQAAAAIAAABs" + "AQAABgAAAAEAAAB8AQAAAiAAABkAAACcAQAABSAAAAEAAABEAgAAAxAAAAEAAABgAgAAASAAAAEA" + "AABkAgAAACAAAAEAAAB8AgAAABAAAAEAAACcAgAA"; + + size_t length; + std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length)); + CHECK(dex_bytes != nullptr); + // Note: `dex_file` will be destroyed before `dex_bytes`. + std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length)); + std::string error_msg; + EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(), + dex_file->Begin(), + dex_file->Size(), + "good static field initial values array", + /*verify_checksum*/ true, + &error_msg)); +} + } // namespace art diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc index 24b1abbad4..3c8243a6c5 100644 --- a/runtime/dexopt_test.cc +++ b/runtime/dexopt_test.cc @@ -45,18 +45,23 @@ void DexoptTest::PostRuntimeCreate() { } void DexoptTest::GenerateOatForTest(const std::string& dex_location, - const std::string& oat_location, - CompilerFilter::Filter filter, - bool relocate, - bool pic, - bool with_alternate_image) { + const std::string& oat_location_in, + CompilerFilter::Filter filter, + bool relocate, + bool pic, + bool with_alternate_image) { std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA)); std::string dalvik_cache_tmp = dalvik_cache + ".redirected"; - + std::string oat_location = oat_location_in; if (!relocate) { // Temporarily redirect the dalvik cache so dex2oat doesn't find the // relocated image file. ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno); + // If the oat location is in dalvik cache, replace the cache path with the temporary one. + size_t pos = oat_location.find(dalvik_cache); + if (pos != std::string::npos) { + oat_location = oat_location.replace(pos, dalvik_cache.length(), dalvik_cache_tmp); + } } std::vector<std::string> args; @@ -90,6 +95,7 @@ void DexoptTest::GenerateOatForTest(const std::string& dex_location, if (!relocate) { // Restore the dalvik cache if needed. ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno); + oat_location = oat_location_in; } // Verify the odex file was generated as expected. diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index 3820d854f9..5762e4f00a 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -16,6 +16,7 @@ #include "base/logging.h" #include "base/mutex.h" +#include "base/systrace.h" #include "callee_save_frame.h" #include "interpreter/interpreter.h" #include "obj_ptr-inl.h" // TODO: Find the other include that isn't complete, and clean this up. @@ -24,8 +25,9 @@ namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) +NO_RETURN static void artDeoptimizeImpl(Thread* self, DeoptimizationKind kind, bool single_frame) REQUIRES_SHARED(Locks::mutator_lock_) { + Runtime::Current()->IncrementDeoptimizationCount(kind); if (VLOG_IS_ON(deopt)) { if (single_frame) { // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the @@ -38,10 +40,13 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) self->AssertHasDeoptimizationContext(); QuickExceptionHandler exception_handler(self, true); - if (single_frame) { - exception_handler.DeoptimizeSingleFrame(); - } else { - exception_handler.DeoptimizeStack(); + { + ScopedTrace trace(std::string("Deoptimization ") + GetDeoptimizationKindName(kind)); + if (single_frame) { + exception_handler.DeoptimizeSingleFrame(kind); + } else { + exception_handler.DeoptimizeStack(); + } } uintptr_t return_pc = exception_handler.UpdateInstrumentationStack(); if (exception_handler.IsFullFragmentDone()) { @@ -57,18 +62,18 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) extern "C" NO_RETURN void artDeoptimize(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self, false); + artDeoptimizeImpl(self, DeoptimizationKind::kFullFrame, false); } -// This is called directly from compiled code by an HDepptimize. -extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) +// This is called directly from compiled code by an HDeoptimize. +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException()); - artDeoptimizeImpl(self, true); + artDeoptimizeImpl(self, kind, true); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 355d7b3e2f..6b965678c3 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -58,18 +58,13 @@ static inline void BssWriteBarrier(ArtMethod* outer_method) REQUIRES_SHARED(Lock } } -constexpr Runtime::CalleeSaveType kInitEntrypointSaveType = - // TODO: Change allocation entrypoints on MIPS and MIPS64 to kSaveEverything. - (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly - : Runtime::kSaveEverything; - extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { // Called to ensure static storage base is initialized for direct static field reads and writes. // A class may be accessing another class' fields when it doesn't have access, as access has been // given by inheritance. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false); @@ -83,7 +78,7 @@ extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx, Thread* s REQUIRES_SHARED(Locks::mutator_lock_) { // Called when method->dex_cache_resolved_types_[] misses. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false); @@ -98,7 +93,7 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type // Called when caller isn't guaranteed to have access to a type and the dex cache may be // unpopulated. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true); @@ -111,7 +106,7 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx)); if (LIKELY(result != nullptr)) { diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 915f18ed71..6cd9dc1d71 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -21,6 +21,7 @@ #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "offsets.h" #define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index e0a2e3cf8f..e2d45acb34 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -140,7 +140,7 @@ V(ThrowNullPointer, void, void) \ V(ThrowStackOverflow, void, void*) \ V(ThrowStringBounds, void, int32_t, int32_t) \ - V(Deoptimize, void, void) \ + V(Deoptimize, void, DeoptimizationKind) \ \ V(A64Load, int64_t, volatile const int64_t *) \ V(A64Store, void, volatile int64_t *, int64_t) \ diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index 4ca52de2a2..fa287cb0ad 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_ #define ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_ +#include "deoptimization_kind.h" + namespace art { #ifndef BUILDING_LIBART @@ -77,7 +79,7 @@ static inline const void* GetQuickInstrumentationEntryPoint() { } // Stub to deoptimize from compiled code. -extern "C" void art_quick_deoptimize_from_compiled_code(); +extern "C" void art_quick_deoptimize_from_compiled_code(DeoptimizationKind); // The return_pc of instrumentation exit stub. extern "C" void art_quick_instrumentation_exit(); diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index cd30d9d149..c3dd21f113 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -51,6 +51,7 @@ class CardTable { static constexpr size_t kCardSize = 1 << kCardShift; static constexpr uint8_t kCardClean = 0x0; static constexpr uint8_t kCardDirty = 0x70; + static constexpr uint8_t kCardAged = kCardDirty - 1; static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity); ~CardTable(); diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index d5c36bfb19..3503973321 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -152,7 +152,8 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref, inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { mirror::Object* ret; - if (from_ref == nullptr) { + // We can get here before marking starts since we gray immune objects before the marking phase. + if (from_ref == nullptr || !Thread::Current()->GetIsGcMarking()) { return from_ref; } // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index e27c1ecb08..a450a751b8 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -77,6 +77,7 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock), thread_running_gc_(nullptr), is_marking_(false), + is_using_read_barrier_entrypoints_(false), is_active_(false), is_asserting_to_space_invariant_(false), region_space_bitmap_(nullptr), @@ -163,6 +164,15 @@ void ConcurrentCopying::RunPhases() { ReaderMutexLock mu(self, *Locks::mutator_lock_); InitializePhase(); } + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + // Switch to read barrier mark entrypoints before we gray the objects. This is required in case + // a mutator sees a gray bit and dispatches on the entrpoint. (b/37876887). + ActivateReadBarrierEntrypoints(); + // Gray dirty immune objects concurrently to reduce GC pause times. We re-process gray cards in + // the pause. + ReaderMutexLock mu(self, *Locks::mutator_lock_); + GrayAllDirtyImmuneObjects(); + } FlipThreadRoots(); { ReaderMutexLock mu(self, *Locks::mutator_lock_); @@ -192,6 +202,59 @@ void ConcurrentCopying::RunPhases() { thread_running_gc_ = nullptr; } +class ConcurrentCopying::ActivateReadBarrierEntrypointsCheckpoint : public Closure { + public: + explicit ActivateReadBarrierEntrypointsCheckpoint(ConcurrentCopying* concurrent_copying) + : concurrent_copying_(concurrent_copying) {} + + void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + // Switch to the read barrier entrypoints. + thread->SetReadBarrierEntrypoints(); + // If thread is a running mutator, then act on behalf of the garbage collector. + // See the code in ThreadList::RunCheckpoint. + concurrent_copying_->GetBarrier().Pass(self); + } + + private: + ConcurrentCopying* const concurrent_copying_; +}; + +class ConcurrentCopying::ActivateReadBarrierEntrypointsCallback : public Closure { + public: + explicit ActivateReadBarrierEntrypointsCallback(ConcurrentCopying* concurrent_copying) + : concurrent_copying_(concurrent_copying) {} + + void Run(Thread* self ATTRIBUTE_UNUSED) OVERRIDE REQUIRES(Locks::thread_list_lock_) { + // This needs to run under the thread_list_lock_ critical section in ThreadList::RunCheckpoint() + // to avoid a race with ThreadList::Register(). + CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_); + concurrent_copying_->is_using_read_barrier_entrypoints_ = true; + } + + private: + ConcurrentCopying* const concurrent_copying_; +}; + +void ConcurrentCopying::ActivateReadBarrierEntrypoints() { + Thread* const self = Thread::Current(); + ActivateReadBarrierEntrypointsCheckpoint checkpoint(this); + ThreadList* thread_list = Runtime::Current()->GetThreadList(); + gc_barrier_->Init(self, 0); + ActivateReadBarrierEntrypointsCallback callback(this); + const size_t barrier_count = thread_list->RunCheckpoint(&checkpoint, &callback); + // If there are no threads to wait which implies that all the checkpoint functions are finished, + // then no need to release the mutator lock. + if (barrier_count == 0) { + return; + } + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + gc_barrier_->Increment(self, barrier_count); +} + void ConcurrentCopying::BindBitmaps() { Thread* self = Thread::Current(); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); @@ -352,9 +415,12 @@ class ConcurrentCopying::FlipCallback : public Closure { if (kVerifyNoMissingCardMarks) { cc->VerifyNoMissingCardMarks(); } - CHECK(thread == self); + CHECK_EQ(thread, self); Locks::mutator_lock_->AssertExclusiveHeld(self); - cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_); + { + TimingLogger::ScopedTiming split2("(Paused)SetFromSpace", cc->GetTimings()); + cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_); + } cc->SwapStacks(); if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) { cc->RecordLiveStackFreezeSize(self); @@ -368,11 +434,11 @@ class ConcurrentCopying::FlipCallback : public Closure { } if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) { CHECK(Runtime::Current()->IsAotCompiler()); - TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings()); + TimingLogger::ScopedTiming split3("(Paused)VisitTransactionRoots", cc->GetTimings()); Runtime::Current()->VisitTransactionRoots(cc); } if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { - cc->GrayAllDirtyImmuneObjects(); + cc->GrayAllNewlyDirtyImmuneObjects(); if (kIsDebugBuild) { // Check that all non-gray immune objects only refernce immune objects. cc->VerifyGrayImmuneObjects(); @@ -519,8 +585,8 @@ class ConcurrentCopying::VerifyNoMissingCardMarkVisitor { void ConcurrentCopying::VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg) { auto* collector = reinterpret_cast<ConcurrentCopying*>(arg); - // Objects not on dirty cards should never have references to newly allocated regions. - if (!collector->heap_->GetCardTable()->IsDirty(obj)) { + // Objects not on dirty or aged cards should never have references to newly allocated regions. + if (collector->heap_->GetCardTable()->GetCard(obj) == gc::accounting::CardTable::kCardClean) { VerifyNoMissingCardMarkVisitor visitor(collector, /*holder*/ obj); obj->VisitReferences</*kVisitNativeRoots*/true, kVerifyNone, kWithoutReadBarrier>( visitor, @@ -583,53 +649,100 @@ void ConcurrentCopying::FlipThreadRoots() { } } +template <bool kConcurrent> class ConcurrentCopying::GrayImmuneObjectVisitor { public: - explicit GrayImmuneObjectVisitor() {} + explicit GrayImmuneObjectVisitor(Thread* self) : self_(self) {} ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) { - if (kUseBakerReadBarrier) { - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); + if (kUseBakerReadBarrier && obj->GetReadBarrierState() == ReadBarrier::WhiteState()) { + if (kConcurrent) { + Locks::mutator_lock_->AssertSharedHeld(self_); + obj->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState()); + // Mod union table VisitObjects may visit the same object multiple times so we can't check + // the result of the atomic set. + } else { + Locks::mutator_lock_->AssertExclusiveHeld(self_); + obj->SetReadBarrierState(ReadBarrier::GrayState()); } - obj->SetReadBarrierState(ReadBarrier::GrayState()); } } static void Callback(mirror::Object* obj, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) { - reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj); + reinterpret_cast<GrayImmuneObjectVisitor<kConcurrent>*>(arg)->operator()(obj); } + + private: + Thread* const self_; }; void ConcurrentCopying::GrayAllDirtyImmuneObjects() { - TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); - gc::Heap* const heap = Runtime::Current()->GetHeap(); - accounting::CardTable* const card_table = heap->GetCardTable(); - WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + TimingLogger::ScopedTiming split("GrayAllDirtyImmuneObjects", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + Thread* const self = Thread::Current(); + using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ true>; + VisitorType visitor(self); + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - GrayImmuneObjectVisitor visitor; - accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); // Mark all the objects on dirty cards since these may point to objects in other space. // Once these are marked, the GC will eventually clear them later. // Table is non null for boot image and zygote spaces. It is only null for application image // spaces. if (table != nullptr) { - // TODO: Consider adding precleaning outside the pause. table->ProcessCards(); - table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor); - // Since the cards are recorded in the mod-union table and this is paused, we can clear - // the cards for the space (to madvise). + table->VisitObjects(&VisitorType::Callback, &visitor); + // Don't clear cards here since we need to rescan in the pause. If we cleared the cards here, + // there would be races with the mutator marking new cards. + } else { + // Keep cards aged if we don't have a mod-union table since we may need to scan them in future + // GCs. This case is for app images. + card_table->ModifyCardsAtomic( + space->Begin(), + space->End(), + [](uint8_t card) { + return (card != gc::accounting::CardTable::kCardClean) + ? gc::accounting::CardTable::kCardAged + : card; + }, + /* card modified visitor */ VoidFunctor()); + card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + gc::accounting::CardTable::kCardAged); + } + } +} + +void ConcurrentCopying::GrayAllNewlyDirtyImmuneObjects() { + TimingLogger::ScopedTiming split("(Paused)GrayAllNewlyDirtyImmuneObjects", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ false>; + Thread* const self = Thread::Current(); + VisitorType visitor(self); + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + + // Don't need to scan aged cards since we did these before the pause. Note that scanning cards + // also handles the mod-union table cards. + card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + gc::accounting::CardTable::kCardDirty); + if (table != nullptr) { + // Add the cards to the mod-union table so that we can clear cards to save RAM. + table->ProcessCards(); TimingLogger::ScopedTiming split2("(Paused)ClearCards", GetTimings()); card_table->ClearCardRange(space->Begin(), AlignDown(space->End(), accounting::CardTable::kCardSize)); - } else { - // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the - // pause because app image spaces are all dirty pages anyways. - card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor); } } - // Since all of the objects that may point to other spaces are marked, we can avoid all the read + // Since all of the objects that may point to other spaces are gray, we can avoid all the read // barriers in the immune spaces. updated_all_immune_objects_.StoreRelaxed(true); } @@ -658,6 +771,7 @@ class ConcurrentCopying::ImmuneSpaceScanObjVisitor { ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) { if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + // Only need to scan gray objects. if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) { collector_->ScanImmuneObject(obj); // Done scanning the object, go back to white. @@ -707,6 +821,7 @@ void ConcurrentCopying::MarkingPhase() { if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) { table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor); } else { + // TODO: Scan only the aged cards. live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), reinterpret_cast<uintptr_t>(space->Limit()), visitor); @@ -876,6 +991,12 @@ class ConcurrentCopying::DisableMarkingCallback : public Closure { // to avoid a race with ThreadList::Register(). CHECK(concurrent_copying_->is_marking_); concurrent_copying_->is_marking_ = false; + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + CHECK(concurrent_copying_->is_using_read_barrier_entrypoints_); + concurrent_copying_->is_using_read_barrier_entrypoints_ = false; + } else { + CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_); + } } private: @@ -1621,25 +1742,29 @@ void ConcurrentCopying::MarkZygoteLargeObjects() { Thread* const self = Thread::Current(); WriterMutexLock rmu(self, *Locks::heap_bitmap_lock_); space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace(); - // Pick the current live bitmap (mark bitmap if swapped). - accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap(); - accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap(); - // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept. - std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic(); - live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first), - reinterpret_cast<uintptr_t>(range.second), - [mark_bitmap, los, self](mirror::Object* obj) - REQUIRES(Locks::heap_bitmap_lock_) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (los->IsZygoteLargeObject(self, obj)) { - mark_bitmap->Set(obj); - } - }); + if (los != nullptr) { + // Pick the current live bitmap (mark bitmap if swapped). + accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap(); + accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap(); + // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept. + std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic(); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first), + reinterpret_cast<uintptr_t>(range.second), + [mark_bitmap, los, self](mirror::Object* obj) + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (los->IsZygoteLargeObject(self, obj)) { + mark_bitmap->Set(obj); + } + }); + } } void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) { TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings()); - RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); + if (heap_->GetLargeObjectsSpace() != nullptr) { + RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); + } } void ConcurrentCopying::ReclaimPhase() { @@ -1888,7 +2013,6 @@ void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* o heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); accounting::LargeObjectBitmap* los_bitmap = heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; bool is_los = mark_bitmap == nullptr; if ((!is_los && mark_bitmap->Test(ref)) || (is_los && los_bitmap->Test(ref))) { @@ -2392,7 +2516,6 @@ mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref, heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); accounting::LargeObjectBitmap* los_bitmap = heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; bool is_los = mark_bitmap == nullptr; if (!is_los && mark_bitmap->Test(ref)) { // Already marked. diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 37b6a2c541..c09e0eb109 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -118,6 +118,11 @@ class ConcurrentCopying : public GarbageCollector { bool IsMarking() const { return is_marking_; } + // We may want to use read barrier entrypoints before is_marking_ is true since concurrent graying + // creates a small window where we might dispatch on these entrypoints. + bool IsUsingReadBarrierEntrypoints() const { + return is_using_read_barrier_entrypoints_; + } bool IsActive() const { return is_active_; } @@ -165,6 +170,9 @@ class ConcurrentCopying : public GarbageCollector { void GrayAllDirtyImmuneObjects() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void GrayAllNewlyDirtyImmuneObjects() + REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_); void VerifyGrayImmuneObjects() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); @@ -252,6 +260,8 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_); + // Set the read barrier mark entrypoints to non-null. + void ActivateReadBarrierEntrypoints(); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; @@ -268,6 +278,8 @@ class ConcurrentCopying : public GarbageCollector { GUARDED_BY(mark_stack_lock_); Thread* thread_running_gc_; bool is_marking_; // True while marking is ongoing. + // True while we might dispatch on the read barrier entrypoints. + bool is_using_read_barrier_entrypoints_; bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. ImmuneSpaces immune_spaces_; @@ -330,6 +342,8 @@ class ConcurrentCopying : public GarbageCollector { // ObjPtr since the GC may transition to suspended and runnable between phases. mirror::Class* java_lang_Object_; + class ActivateReadBarrierEntrypointsCallback; + class ActivateReadBarrierEntrypointsCheckpoint; class AssertToSpaceInvariantFieldVisitor; class AssertToSpaceInvariantObjectVisitor; class AssertToSpaceInvariantRefsVisitor; @@ -339,7 +353,7 @@ class ConcurrentCopying : public GarbageCollector { class DisableMarkingCheckpoint; class DisableWeakRefAccessCallback; class FlipCallback; - class GrayImmuneObjectVisitor; + template <bool kConcurrent> class GrayImmuneObjectVisitor; class ImmuneSpaceScanObjVisitor; class LostCopyVisitor; class RefFieldsVisitor; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index bd4f99b7f5..298336ae4d 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -150,8 +150,13 @@ static constexpr bool kUsePartialTlabs = true; static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity); #else -// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000. +#ifdef __ANDROID__ +// For 32-bit Android, use 0x20000000 because asan reserves 0x04000000 - 0x20000000. static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000); +#else +// For 32-bit host, use 0x40000000 because asan uses most of the space below this. +static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x40000000); +#endif #endif static inline bool CareAboutPauseTimes() { diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index b32b272a31..1dfb0f6e9a 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -353,6 +353,7 @@ Jit::~Jit() { DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted()); if (dump_info_on_shutdown_) { DumpInfo(LOG_STREAM(INFO)); + Runtime::Current()->DumpDeoptimizations(LOG_STREAM(INFO)); } DeleteThreadPool(); if (jit_compiler_handle_ != nullptr) { diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index e9a5ae5fa9..81b87f15fd 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -149,7 +149,6 @@ JitCodeCache::JitCodeCache(MemMap* code_map, used_memory_for_code_(0), number_of_compilations_(0), number_of_osr_compilations_(0), - number_of_deoptimizations_(0), number_of_collections_(0), histogram_stack_map_memory_use_("Memory used for stack maps", 16), histogram_code_memory_use_("Memory used for compiled code", 16), @@ -1416,8 +1415,6 @@ void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method, osr_code_map_.erase(it); } } - MutexLock mu(Thread::Current(), lock_); - number_of_deoptimizations_++; } uint8_t* JitCodeCache::AllocateCode(size_t code_size) { @@ -1456,7 +1453,6 @@ void JitCodeCache::Dump(std::ostream& os) { << "Total number of JIT compilations: " << number_of_compilations_ << "\n" << "Total number of JIT compilations for on stack replacement: " << number_of_osr_compilations_ << "\n" - << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n" << "Total number of JIT code cache collections: " << number_of_collections_ << std::endl; histogram_stack_map_memory_use_.PrintMemoryUse(os); histogram_code_memory_use_.PrintMemoryUse(os); diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index db214e7983..612d06ba1c 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -384,9 +384,6 @@ class JitCodeCache { // Number of compilations for on-stack-replacement done throughout the lifetime of the JIT. size_t number_of_osr_compilations_ GUARDED_BY(lock_); - // Number of deoptimizations done throughout the lifetime of the JIT. - size_t number_of_deoptimizations_ GUARDED_BY(lock_); - // Number of code cache collections done throughout the lifetime of the JIT. size_t number_of_collections_ GUARDED_BY(lock_); diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc index 52649c7075..0acce1e421 100644 --- a/runtime/jit/profile_compilation_info.cc +++ b/runtime/jit/profile_compilation_info.cc @@ -115,7 +115,11 @@ bool ProfileCompilationInfo::MergeAndSave(const std::string& filename, ScopedTrace trace(__PRETTY_FUNCTION__); ScopedFlock flock; std::string error; - if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) { + int flags = O_RDWR | O_NOFOLLOW | O_CLOEXEC; + // There's no need to fsync profile data right away. We get many chances + // to write it again in case something goes wrong. We can rely on a simple + // close(), no sync, and let to the kernel decide when to write to disk. + if (!flock.Init(filename.c_str(), flags, /*block*/false, /*flush_on_close*/false, &error)) { LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error; return false; } diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc index d190bdfd46..1441987ef0 100644 --- a/runtime/jit/profile_saver.cc +++ b/runtime/jit/profile_saver.cc @@ -212,6 +212,10 @@ class GetMethodsVisitor : public ClassVisitor { void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() { ScopedTrace trace(__PRETTY_FUNCTION__); + + // Resolve any new registered locations. + ResolveTrackedLocations(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); std::set<DexCacheResolvedClasses> resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/ true); @@ -260,6 +264,10 @@ void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() { bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) { ScopedTrace trace(__PRETTY_FUNCTION__); + + // Resolve any new registered locations. + ResolveTrackedLocations(); + SafeMap<std::string, std::set<std::string>> tracked_locations; { // Make a copy so that we don't hold the lock while doing I/O. @@ -497,17 +505,34 @@ bool ProfileSaver::IsStarted() { return instance_ != nullptr; } -void ProfileSaver::AddTrackedLocations(const std::string& output_filename, - const std::vector<std::string>& code_paths) { - auto it = tracked_dex_base_locations_.find(output_filename); - if (it == tracked_dex_base_locations_.end()) { - tracked_dex_base_locations_.Put(output_filename, - std::set<std::string>(code_paths.begin(), code_paths.end())); +static void AddTrackedLocationsToMap(const std::string& output_filename, + const std::vector<std::string>& code_paths, + SafeMap<std::string, std::set<std::string>>* map) { + auto it = map->find(output_filename); + if (it == map->end()) { + map->Put(output_filename, std::set<std::string>(code_paths.begin(), code_paths.end())); } else { it->second.insert(code_paths.begin(), code_paths.end()); } } +void ProfileSaver::AddTrackedLocations(const std::string& output_filename, + const std::vector<std::string>& code_paths) { + // Add the code paths to the list of tracked location. + AddTrackedLocationsToMap(output_filename, code_paths, &tracked_dex_base_locations_); + // The code paths may contain symlinks which could fool the profiler. + // If the dex file is compiled with an absolute location but loaded with symlink + // the profiler could skip the dex due to location mismatch. + // To avoid this, we add the code paths to the temporary cache of 'to_be_resolved' + // locations. When the profiler thread executes we will resolve the paths to their + // real paths. + // Note that we delay taking the realpath to avoid spending more time than needed + // when registering location (as it is done during app launch). + AddTrackedLocationsToMap(output_filename, + code_paths, + &tracked_dex_base_locations_to_be_resolved_); +} + void ProfileSaver::DumpInstanceInfo(std::ostream& os) { MutexLock mu(Thread::Current(), *Locks::profiler_lock_); if (instance_ != nullptr) { @@ -556,4 +581,38 @@ bool ProfileSaver::HasSeenMethod(const std::string& profile, return false; } +void ProfileSaver::ResolveTrackedLocations() { + SafeMap<std::string, std::set<std::string>> locations_to_be_resolved; + { + // Make a copy so that we don't hold the lock while doing I/O. + MutexLock mu(Thread::Current(), *Locks::profiler_lock_); + locations_to_be_resolved = tracked_dex_base_locations_to_be_resolved_; + tracked_dex_base_locations_to_be_resolved_.clear(); + } + + // Resolve the locations. + SafeMap<std::string, std::vector<std::string>> resolved_locations_map; + for (const auto& it : locations_to_be_resolved) { + const std::string& filename = it.first; + const std::set<std::string>& locations = it.second; + auto resolved_locations_it = resolved_locations_map.Put( + filename, + std::vector<std::string>(locations.size())); + + for (const auto& location : locations) { + UniqueCPtr<const char[]> location_real(realpath(location.c_str(), nullptr)); + // Note that it's ok if we cannot get the real path. + if (location_real != nullptr) { + resolved_locations_it->second.emplace_back(location_real.get()); + } + } + } + + // Add the resolved locations to the tracked collection. + MutexLock mu(Thread::Current(), *Locks::profiler_lock_); + for (const auto& it : resolved_locations_map) { + AddTrackedLocationsToMap(it.first, it.second, &tracked_dex_base_locations_); + } +} + } // namespace art diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h index be2bffc647..bd539a41d0 100644 --- a/runtime/jit/profile_saver.h +++ b/runtime/jit/profile_saver.h @@ -112,6 +112,10 @@ class ProfileSaver { void DumpInfo(std::ostream& os); + // Resolve the realpath of the locations stored in tracked_dex_base_locations_to_be_resolved_ + // and put the result in tracked_dex_base_locations_. + void ResolveTrackedLocations() REQUIRES(!Locks::profiler_lock_); + // The only instance of the saver. static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_); // Profile saver thread. @@ -119,11 +123,17 @@ class ProfileSaver { jit::JitCodeCache* jit_code_cache_; - // Collection of code paths that the profiles tracks. + // Collection of code paths that the profiler tracks. // It maps profile locations to code paths (dex base locations). SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_ GUARDED_BY(Locks::profiler_lock_); + // Collection of code paths that the profiler tracks but may note have been resolved + // to their realpath. The resolution is done async to minimize the time it takes for + // someone to register a path. + SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_to_be_resolved_ + GUARDED_BY(Locks::profiler_lock_); + bool shutting_down_ GUARDED_BY(Locks::profiler_lock_); uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_); uint32_t jit_activity_notifications_; diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h index c8d256fec0..07aeb66eb6 100644 --- a/runtime/jit/profile_saver_options.h +++ b/runtime/jit/profile_saver_options.h @@ -20,7 +20,7 @@ namespace art { struct ProfileSaverOptions { public: - static constexpr uint32_t kMinSavePeriodMs = 20 * 1000; // 20 seconds + static constexpr uint32_t kMinSavePeriodMs = 40 * 1000; // 40 seconds static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000; // 5 seconds // Minimum number of JIT samples during launch to include a method into the profile. static constexpr uint32_t kStartupMethodSamples = 1; diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 0617dae1ae..77554e8b30 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -622,6 +622,31 @@ static jstring DexFile_getNonProfileGuidedCompilerFilter(JNIEnv* env, return env->NewStringUTF(new_filter_str.c_str()); } +static jstring DexFile_getSafeModeCompilerFilter(JNIEnv* env, + jclass javeDexFileClass ATTRIBUTE_UNUSED, + jstring javaCompilerFilter) { + ScopedUtfChars compiler_filter(env, javaCompilerFilter); + if (env->ExceptionCheck()) { + return nullptr; + } + + CompilerFilter::Filter filter; + if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) { + return javaCompilerFilter; + } + + CompilerFilter::Filter new_filter = CompilerFilter::GetSafeModeFilterFrom(filter); + + // Filter stayed the same, return input. + if (filter == new_filter) { + return javaCompilerFilter; + } + + // Create a new string object and return. + std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter); + return env->NewStringUTF(new_filter_str.c_str()); +} + static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) { const OatFile* oat_file = nullptr; std::vector<const DexFile*> dex_files; @@ -695,6 +720,9 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, getNonProfileGuidedCompilerFilter, "(Ljava/lang/String;)Ljava/lang/String;"), + NATIVE_METHOD(DexFile, + getSafeModeCompilerFilter, + "(Ljava/lang/String;)Ljava/lang/String;"), NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"), NATIVE_METHOD(DexFile, getDexFileStatus, "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"), diff --git a/runtime/oat.h b/runtime/oat.h index 9b2227bc0c..e119b81bff 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Revert concurrent graying for immune spaces. - static constexpr uint8_t kOatVersion[] = { '1', '2', '2', '\0' }; + static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' }; // ARM Baker narrow thunks. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index eafa77f1a2..603bbbf8bd 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -68,19 +68,34 @@ std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStat OatFileAssistant::OatFileAssistant(const char* dex_location, const InstructionSet isa, bool load_executable) - : OatFileAssistant(dex_location, nullptr, isa, load_executable) -{ } - -OatFileAssistant::OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable) : isa_(isa), load_executable_(load_executable), odex_(this, /*is_oat_location*/ false), oat_(this, /*is_oat_location*/ true) { CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location"; - dex_location_.assign(dex_location); + + // Try to get the realpath for the dex location. + // + // This is OK with respect to dalvik cache naming scheme because we never + // generate oat files starting from symlinks which go into dalvik cache. + // (recall that the oat files in dalvik cache are encoded by replacing '/' + // with '@' in the path). + // The boot image oat files (which are symlinked in dalvik-cache) are not + // loaded via the oat file assistant. + // + // The only case when the dex location may resolve to a different path + // is for secondary dex files (e.g. /data/user/0 symlinks to /data/data and + // the app is free to create its own internal layout). Related to this it is + // worthwhile to mention that installd resolves the secondary dex location + // before calling dex2oat. + UniqueCPtr<const char[]> dex_location_real(realpath(dex_location, nullptr)); + if (dex_location_real != nullptr) { + dex_location_.assign(dex_location_real.get()); + } else { + // If we can't get the realpath of the location there's not much point in trying to move on. + PLOG(ERROR) << "Could not get the realpath of dex_location " << dex_location; + return; + } if (load_executable_ && isa != kRuntimeISA) { LOG(WARNING) << "OatFileAssistant: Load executable specified, " @@ -98,15 +113,27 @@ OatFileAssistant::OatFileAssistant(const char* dex_location, } // Get the oat filename. - if (oat_location != nullptr) { - oat_.Reset(oat_location); + std::string oat_file_name; + if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { + oat_.Reset(oat_file_name); } else { - std::string oat_file_name; - if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { - oat_.Reset(oat_file_name); - } else { - LOG(WARNING) << "Failed to determine oat file name for dex location " + LOG(WARNING) << "Failed to determine oat file name for dex location " << dex_location_ << ": " << error_msg; + } + + // Check if the dex directory is writable. + // This will be needed in most uses of OatFileAssistant and so it's OK to + // compute it eagerly. (the only use which will not make use of it is + // OatFileAssistant::GetStatusDump()) + size_t pos = dex_location_.rfind('/'); + if (pos == std::string::npos) { + LOG(WARNING) << "Failed to determine dex file parent directory: " << dex_location_; + } else { + std::string parent = dex_location_.substr(0, pos); + if (access(parent.c_str(), W_OK) == 0) { + dex_parent_writable_ = true; + } else { + VLOG(oat) << "Dex parent of " << dex_location_ << " is not writable: " << strerror(errno); } } } @@ -139,12 +166,17 @@ bool OatFileAssistant::Lock(std::string* error_msg) { CHECK(error_msg != nullptr); CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired"; - const std::string* oat_file_name = oat_.Filename(); - if (oat_file_name == nullptr) { - *error_msg = "Failed to determine lock file"; - return false; - } - std::string lock_file_name = *oat_file_name + ".flock"; + // Note the lock will only succeed for secondary dex files and in test + // environment. + // + // The lock *will fail* for all primary apks in a production environment. + // The app does not have permissions to create locks next to its dex location + // (be it system, data or vendor parition). We also cannot use the odex or + // oat location for the same reasoning. + // + // This is best effort and if it fails it's unlikely that we will be able + // to generate oat files anyway. + std::string lock_file_name = dex_location_ + "." + GetInstructionSetString(isa_) + ".flock"; if (!flock_.Init(lock_file_name.c_str(), error_msg)) { unlink(lock_file_name.c_str()); @@ -170,7 +202,7 @@ static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter, CHECK(filter != nullptr); CHECK(error_msg != nullptr); - *filter = CompilerFilter::kDefaultCompilerFilter; + *filter = OatFileAssistant::kDefaultCompilerFilterForDexLoading; for (StringPiece option : Runtime::Current()->GetCompilerOptions()) { if (option.starts_with("--compiler-filter=")) { const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data(); @@ -207,7 +239,7 @@ OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) { case kDex2OatForBootImage: case kDex2OatForRelocation: case kDex2OatForFilter: - return GenerateOatFile(error_msg); + return GenerateOatFileNoChecks(info, error_msg); } UNREACHABLE(); } @@ -479,8 +511,110 @@ OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& return kOatUpToDate; } -OatFileAssistant::ResultOfAttemptToUpdate -OatFileAssistant::GenerateOatFile(std::string* error_msg) { +static bool DexLocationToOdexNames(const std::string& location, + InstructionSet isa, + std::string* odex_filename, + std::string* oat_dir, + std::string* isa_dir, + std::string* error_msg) { + CHECK(odex_filename != nullptr); + CHECK(error_msg != nullptr); + + // The odex file name is formed by replacing the dex_location extension with + // .odex and inserting an oat/<isa> directory. For example: + // location = /foo/bar/baz.jar + // odex_location = /foo/bar/oat/<isa>/baz.odex + + // Find the directory portion of the dex location and add the oat/<isa> + // directory. + size_t pos = location.rfind('/'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no directory."; + return false; + } + std::string dir = location.substr(0, pos+1); + // Add the oat directory. + dir += "oat"; + if (oat_dir != nullptr) { + *oat_dir = dir; + } + // Add the isa directory + dir += "/" + std::string(GetInstructionSetString(isa)); + if (isa_dir != nullptr) { + *isa_dir = dir; + } + + // Get the base part of the file without the extension. + std::string file = location.substr(pos+1); + pos = file.rfind('.'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no extension."; + return false; + } + std::string base = file.substr(0, pos); + + *odex_filename = dir + "/" + base + ".odex"; + return true; +} + +// Prepare a subcomponent of the odex directory. +// (i.e. create and set the expected permissions on the path `dir`). +static bool PrepareDirectory(const std::string& dir, std::string* error_msg) { + struct stat dir_stat; + if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &dir_stat)) == 0) { + // The directory exists. Check if it is indeed a directory. + if (!S_ISDIR(dir_stat.st_mode)) { + *error_msg = dir + " is not a dir"; + return false; + } else { + // The dir is already on disk. + return true; + } + } + + // Failed to stat. We need to create the directory. + if (errno != ENOENT) { + *error_msg = "Could not stat isa dir " + dir + ":" + strerror(errno); + return false; + } + + mode_t mode = S_IRWXU | S_IXGRP | S_IXOTH; + if (mkdir(dir.c_str(), mode) != 0) { + *error_msg = "Could not create dir " + dir + ":" + strerror(errno); + return false; + } + if (chmod(dir.c_str(), mode) != 0) { + *error_msg = "Could not create the oat dir " + dir + ":" + strerror(errno); + return false; + } + return true; +} + +// Prepares the odex directory for the given dex location. +static bool PrepareOdexDirectories(const std::string& dex_location, + const std::string& expected_odex_location, + InstructionSet isa, + std::string* error_msg) { + std::string actual_odex_location; + std::string oat_dir; + std::string isa_dir; + if (!DexLocationToOdexNames( + dex_location, isa, &actual_odex_location, &oat_dir, &isa_dir, error_msg)) { + return false; + } + DCHECK_EQ(expected_odex_location, actual_odex_location); + + if (!PrepareDirectory(oat_dir, error_msg)) { + return false; + } + if (!PrepareDirectory(isa_dir, error_msg)) { + return false; + } + return true; +} + +OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks( + OatFileAssistant::OatFileInfo& info, std::string* error_msg) { CHECK(error_msg != nullptr); Runtime* runtime = Runtime::Current(); @@ -490,22 +624,37 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (oat_.Filename() == nullptr) { + if (info.Filename() == nullptr) { *error_msg = "Generation of oat file for dex location " + dex_location_ + " not attempted because the oat file name could not be determined."; return kUpdateNotAttempted; } - const std::string& oat_file_name = *oat_.Filename(); + const std::string& oat_file_name = *info.Filename(); const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex"); // dex2oat ignores missing dex files and doesn't report an error. // Check explicitly here so we can detect the error properly. // TODO: Why does dex2oat behave that way? - if (!OS::FileExists(dex_location_.c_str())) { - *error_msg = "Dex location " + dex_location_ + " does not exists."; + struct stat dex_path_stat; + if (TEMP_FAILURE_RETRY(stat(dex_location_.c_str(), &dex_path_stat)) != 0) { + *error_msg = "Could not access dex location " + dex_location_ + ":" + strerror(errno); return kUpdateNotAttempted; } + // If this is the odex location, we need to create the odex file layout (../oat/isa/..) + if (!info.IsOatLocation()) { + if (!PrepareOdexDirectories(dex_location_, oat_file_name, isa_, error_msg)) { + return kUpdateNotAttempted; + } + } + + // Set the permissions for the oat and the vdex files. + // The user always gets read and write while the group and others propagate + // the reading access of the original dex file. + mode_t file_mode = S_IRUSR | S_IWUSR | + (dex_path_stat.st_mode & S_IRGRP) | + (dex_path_stat.st_mode & S_IROTH); + std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str())); if (vdex_file.get() == nullptr) { *error_msg = "Generation of oat file " + oat_file_name @@ -514,7 +663,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(vdex_file->Fd(), 0644) != 0) { + if (fchmod(vdex_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the vdex file " + vdex_file_name + " could not be made world readable."; @@ -528,7 +677,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(oat_file->Fd(), 0644) != 0) { + if (fchmod(oat_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the oat file could not be made world readable."; oat_file->Erase(); @@ -563,8 +712,8 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateFailed; } - // Mark that the oat file has changed and we should try to reload. - oat_.Reset(); + // Mark that the odex file has changed and we should try to reload. + info.Reset(); return kUpdateSucceeded; } @@ -623,35 +772,7 @@ bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location, InstructionSet isa, std::string* odex_filename, std::string* error_msg) { - CHECK(odex_filename != nullptr); - CHECK(error_msg != nullptr); - - // The odex file name is formed by replacing the dex_location extension with - // .odex and inserting an oat/<isa> directory. For example: - // location = /foo/bar/baz.jar - // odex_location = /foo/bar/oat/<isa>/baz.odex - - // Find the directory portion of the dex location and add the oat/<isa> - // directory. - size_t pos = location.rfind('/'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no directory."; - return false; - } - std::string dir = location.substr(0, pos+1); - dir += "oat/" + std::string(GetInstructionSetString(isa)); - - // Get the base part of the file without the extension. - std::string file = location.substr(pos+1); - pos = file.rfind('.'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no extension."; - return false; - } - std::string base = file.substr(0, pos); - - *odex_filename = dir + "/" + base + ".odex"; - return true; + return DexLocationToOdexNames(location, isa, odex_filename, nullptr, nullptr, error_msg); } bool OatFileAssistant::DexLocationToOatFilename(const std::string& location, @@ -752,8 +873,45 @@ const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() { } OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() { - bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen; - return use_oat ? oat_ : odex_; + // TODO(calin): Document the side effects of class loading when + // running dalvikvm command line. + if (dex_parent_writable_) { + // If the parent of the dex file is writable it means that we can + // create the odex file. In this case we unconditionally pick the odex + // as the best oat file. This corresponds to the regular use case when + // apps gets installed or when they load private, secondary dex file. + // For apps on the system partition the odex location will not be + // writable and thus the oat location might be more up to date. + return odex_; + } + + // We cannot write to the odex location. This must be a system app. + + // If the oat location is usable take it. + if (oat_.IsUseable()) { + return oat_; + } + + // The oat file is not usable but the odex file might be up to date. + // This is an indication that we are dealing with an up to date prebuilt + // (that doesn't need relocation). + if (odex_.Status() == kOatUpToDate) { + return odex_; + } + + // The oat file is not usable and the odex file is not up to date. + // However we have access to the original dex file which means we can make + // the oat location up to date. + if (HasOriginalDexFiles()) { + return oat_; + } + + // We got into the worst situation here: + // - the oat location is not usable + // - the prebuild odex location is not up to date + // - and we don't have the original dex file anymore (stripped). + // Pick the odex if it exists, or the oat if not. + return (odex_.Status() == kOatCannotOpen) ? oat_ : odex_; } std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) { diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index b84e711daa..7e2385ec6c 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -47,6 +47,11 @@ class ImageSpace; // dex location is in the boot class path. class OatFileAssistant { public: + // The default compile filter to use when optimizing dex file at load time if they + // are out of date. + static const CompilerFilter::Filter kDefaultCompilerFilterForDexLoading = + CompilerFilter::kQuicken; + enum DexOptNeeded { // No dexopt should (or can) be done to update the apk/jar. // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0 @@ -117,13 +122,6 @@ class OatFileAssistant { const InstructionSet isa, bool load_executable); - // Constructs an OatFileAssistant, providing an explicit target oat_location - // to use instead of the standard oat location. - OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable); - ~OatFileAssistant(); // Returns true if the dex location refers to an element of the boot class @@ -232,16 +230,6 @@ class OatFileAssistant { // Returns the status of the oat file for the dex location. OatStatus OatFileStatus(); - // Generate the oat file from the dex file using the current runtime - // compiler options. - // This does not check the current status before attempting to generate the - // oat file. - // - // If the result is not kUpdateSucceeded, the value of error_msg will be set - // to a string describing why there was a failure or the update was not - // attempted. error_msg must not be null. - ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg); - // Executes dex2oat using the current runtime configuration overridden with // the given arguments. This does not check to see if dex2oat is enabled in // the runtime configuration. @@ -377,6 +365,16 @@ class OatFileAssistant { bool file_released_ = false; }; + // Generate the oat file for the given info from the dex file using the + // current runtime compiler options. + // This does not check the current status before attempting to generate the + // oat file. + // + // If the result is not kUpdateSucceeded, the value of error_msg will be set + // to a string describing why there was a failure or the update was not + // attempted. error_msg must not be null. + ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg); + // Return info for the best oat file. OatFileInfo& GetBestInfo(); @@ -422,6 +420,9 @@ class OatFileAssistant { std::string dex_location_; + // Whether or not the parent directory of the dex file is writable. + bool dex_parent_writable_ = false; + // In a properly constructed OatFileAssistant object, isa_ should be either // the 32 or 64 bit variant for the current device. const InstructionSet isa_ = kNone; @@ -446,6 +447,8 @@ class OatFileAssistant { bool image_info_load_attempted_ = false; std::unique_ptr<ImageInfo> cached_image_info_; + friend class OatFileAssistantTest; + DISALLOW_COPY_AND_ASSIGN(OatFileAssistant); }; diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index 18924e9654..b2b86ee289 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -43,6 +43,38 @@ class OatFileAssistantNoDex2OatTest : public DexoptTest { } }; +class ScopedNonWritable { + public: + explicit ScopedNonWritable(const std::string& dex_location) { + is_valid_ = false; + size_t pos = dex_location.rfind('/'); + if (pos != std::string::npos) { + is_valid_ = true; + dex_parent_ = dex_location.substr(0, pos); + if (chmod(dex_parent_.c_str(), 0555) != 0) { + PLOG(ERROR) << "Could not change permissions on " << dex_parent_; + } + } + } + + bool IsSuccessful() { return is_valid_ && (access(dex_parent_.c_str(), W_OK) != 0); } + + ~ScopedNonWritable() { + if (is_valid_) { + if (chmod(dex_parent_.c_str(), 0777) != 0) { + PLOG(ERROR) << "Could not restore permissions on " << dex_parent_; + } + } + } + + private: + std::string dex_parent_; + bool is_valid_; +}; + +static bool IsExecutedAsRoot() { + return geteuid() == 0; +} // Case: We have a DEX file, but no OAT file for it. // Expect: The status is kDex2OatNeeded. @@ -87,13 +119,126 @@ TEST_F(OatFileAssistantTest, NoDexNoOat) { EXPECT_EQ(nullptr, oat_file.get()); } +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDate) { + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // For the use of oat location by making the dex parent not writable. + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. We load the dex +// file via a symlink. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDateSymLink) { + std::string scratch_dir = GetScratchDir(); + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // Now replace the dex location with a symlink. + std::string link = scratch_dir + "/link"; + ASSERT_EQ(0, symlink(scratch_dir.c_str(), link.c_str())); + dex_location = link + "/OdexUpToDate.jar"; + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + // Case: We have a DEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, OatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and up-to-date OAT file for it. We load the dex file +// via a symlink. +// Expect: The status is kNoDexOptNeeded. +TEST_F(OatFileAssistantTest, OatUpToDateSymLink) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + + std::string real = GetScratchDir() + "/real"; + ASSERT_EQ(0, mkdir(real.c_str(), 0700)); + std::string link = GetScratchDir() + "/link"; + ASSERT_EQ(0, symlink(real.c_str(), link.c_str())); + + std::string dex_location = real + "/OatUpToDate.jar"; + + Copy(GetDexSrc1(), dex_location); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + + // Update the dex location to point to the symlink. + dex_location = link + "/OatUpToDate.jar"; + + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, @@ -120,19 +265,16 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOdex) { } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOdex.jar"; - std::string oat_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; + std::string odex_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; Copy(GetDexSrc1(), dex_location); // Generating and deleting the oat file should have the side effect of // creating an up-to-date vdex file. - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat // file, we can't know that the vdex depends on the boot image and is up to @@ -169,6 +311,11 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { if (!kIsVdexEnabled) { return; } + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOat.jar"; std::string oat_location; @@ -180,6 +327,8 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); ASSERT_EQ(0, unlink(oat_location.c_str())); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat @@ -195,10 +344,19 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but // kDex2Oat if the profile has changed. TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, @@ -219,10 +377,19 @@ TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { // Case: We have a MultiDEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded and we load all dex files. TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar"; Copy(GetMultiDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -240,6 +407,12 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { // Case: We have a MultiDEX file where the non-main multdex entry is out of date. // Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexNonMainOutOfDate.jar"; // Compile code for GetMultiDexSrc1. @@ -250,6 +423,9 @@ TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { // is out of date. Copy(GetMultiDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -287,12 +463,12 @@ TEST_F(OatFileAssistantTest, StrippedMultiDexNonMainOutOfDate) { EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus()); } -// Case: We have a MultiDEX file and up-to-date OAT file for it with relative +// Case: We have a MultiDEX file and up-to-date ODEX file for it with relative // encoded dex locations. // Expect: The oat file status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar"; - std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat"; + std::string odex_location = GetOdexDir() + "/RelativeEncodedDexLocation.odex"; // Create the dex file Copy(GetMultiDexSrc1(), dex_location); @@ -301,16 +477,15 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::vector<std::string> args; args.push_back("--dex-file=" + dex_location); args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar")); - args.push_back("--oat-file=" + oat_location); + args.push_back("--oat-file=" + odex_location); args.push_back("--compiler-filter=speed"); std::string error_msg; ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg; // Verify we can load both dex files. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); + std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); ASSERT_TRUE(oat_file.get() != nullptr); EXPECT_TRUE(oat_file->IsExecutable()); @@ -322,6 +497,12 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { // Case: We have a DEX file and an OAT file out of date with respect to the // dex checksum. TEST_F(OatFileAssistantTest, OatDexOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar"; // We create a dex, generate an oat for it, then overwrite the dex with a @@ -330,6 +511,9 @@ TEST_F(OatFileAssistantTest, OatDexOutOfDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); Copy(GetDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -351,17 +535,14 @@ TEST_F(OatFileAssistantTest, VdexDexOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexDexOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; Copy(GetDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -376,17 +557,14 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexMultiDexNonMainOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.odex"; Copy(GetMultiDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetMultiDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -395,6 +573,12 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { // Case: We have a DEX file and an OAT file out of date with respect to the // boot image. TEST_F(OatFileAssistantTest, OatImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -404,6 +588,9 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -423,6 +610,12 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { // It shouldn't matter that the OAT file is out of date, because it is // verify-at-runtime. TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -432,6 +625,9 @@ TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -586,24 +782,23 @@ TEST_F(OatFileAssistantTest, ResourceOnlyDex) { TEST_F(OatFileAssistantTest, OdexOatOverlap) { std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar"; std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex"; - std::string oat_location = GetOdexDir() + "/OdexOatOverlap.oat"; - // Create the dex and odex files + // Create the dex, the odex and the oat files. Copy(GetDexSrc1(), dex_location); GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Create the oat file by copying the odex so they are located in the same - // place in memory. - Copy(odex_location, oat_location); + GenerateOatForTest(dex_location.c_str(), + CompilerFilter::kSpeed, + /*relocate*/false, + /*pic*/false, + /*with_alternate_image*/false); // Verify things don't go bad. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - // kDex2OatForRelocation is expected rather than -kDex2OatForRelocation - // based on the assumption that the oat location is more up-to-date than the odex + // -kDex2OatForRelocation is expected rather than kDex2OatForRelocation + // based on the assumption that the odex location is more up-to-date than the oat // location, even if they both need relocation. - EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation, + EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); @@ -621,30 +816,6 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file and a PIC ODEX file, but no OAT file. -// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. -TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { - std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar"; - std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex"; - - // Create the dex and odex files - Copy(GetDexSrc1(), dex_location); - GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Verify the status. - OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); - EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); - - EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); - EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); - EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); -} - // Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file. // Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code. TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { @@ -672,11 +843,20 @@ TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: We should load an executable dex file. TEST_F(OatFileAssistantTest, LoadOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -691,11 +871,20 @@ TEST_F(OatFileAssistantTest, LoadOatUpToDate) { // Case: We have a DEX file and up-to-date quicken OAT file for it. // Expect: We should still load the oat file as executable. TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kQuicken); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -710,9 +899,19 @@ TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: Loading non-executable should load the oat non-executable. TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); + + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); // Load the oat using an oat file assistant. @@ -726,70 +925,33 @@ TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file. -// Expect: We should load an executable dex file from an alternative oat -// location. -TEST_F(OatFileAssistantTest, LoadDexNoAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexNoAlternateOat.jar"; - std::string oat_location = GetScratchDir() + "/LoadDexNoAlternateOat.oat"; +// Case: We don't have a DEX file and can't write the oat file. +// Expect: We should fail to generate the oat file without crashing. +TEST_F(OatFileAssistantTest, GenNoDex) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + + std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - Copy(GetDexSrc1(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); std::string error_msg; Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); + // We should get kUpdateSucceeded from MakeUpToDate since there's nothing + // that can be done in this situation. ASSERT_EQ(OatFileAssistant::kUpdateSucceeded, - oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() != nullptr); - EXPECT_TRUE(oat_file->IsExecutable()); - std::vector<std::unique_ptr<const DexFile>> dex_files; - dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str()); - EXPECT_EQ(1u, dex_files.size()); - - EXPECT_TRUE(OS::FileExists(oat_location.c_str())); + oat_file_assistant.MakeUpToDate(false, &error_msg)); - // Verify it didn't create an oat in the default location. + // Verify it didn't create an oat in the default location (dalvik-cache). OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus()); -} - -// Case: We have a DEX file but can't write the oat file. -// Expect: We should fail to make the oat file up to date. -TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar"; - - // Make the oat location unwritable by inserting some non-existent - // intermediate directories. - std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat"; - - Copy(GetDexSrc1(), dex_location); - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.MakeUpToDate(false, &error_msg)); - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() == nullptr); -} - -// Case: We don't have a DEX file and can't write the oat file. -// Expect: We should fail to generate the oat file without crashing. -TEST_F(OatFileAssistantTest, GenNoDex) { - std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - std::string oat_location = GetScratchDir() + "/GenNoDex.oat"; - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.GenerateOatFile(&error_msg)); + // Verify it didn't create the odex file in the default location (../oat/isa/...odex) + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OdexFileStatus()); } // Turn an absolute path into a path relative to the current working @@ -1006,9 +1168,9 @@ TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) { Runtime::Current()->AddCompilerOption("--compiler-filter=quicken"); EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); - EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 932d5edbef..c1cf800e5d 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -615,9 +615,7 @@ std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat( Locks::mutator_lock_->AssertNotHeld(self); Runtime* const runtime = Runtime::Current(); - // TODO(calin): remove the explicit oat_location for OatFileAssistant OatFileAssistant oat_file_assistant(dex_location, - /*oat_location*/ nullptr, kRuntimeISA, !runtime->IsAotCompiler()); diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc index c3a94b93a0..9be486e269 100644 --- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc +++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc @@ -1078,9 +1078,180 @@ class JvmtiFunctions { jint* extension_count_ptr, jvmtiExtensionFunctionInfo** extensions) { ENSURE_VALID_ENV(env); - // We do not have any extension functions. - *extension_count_ptr = 0; - *extensions = nullptr; + ENSURE_NON_NULL(extension_count_ptr); + ENSURE_NON_NULL(extensions); + + std::vector<jvmtiExtensionFunctionInfo> ext_vector; + + // Holders for allocated values. + std::vector<JvmtiUniquePtr<char[]>> char_buffers; + std::vector<JvmtiUniquePtr<jvmtiParamInfo[]>> param_buffers; + std::vector<JvmtiUniquePtr<jvmtiError[]>> error_buffers; + + // Add a helper struct that takes an arbitrary const char*. add_extension will use Allocate + // appropriately. + struct CParamInfo { + const char* name; + jvmtiParamKind kind; + jvmtiParamTypes base_type; + jboolean null_ok; + }; + + auto add_extension = [&](jvmtiExtensionFunction func, + const char* id, + const char* short_description, + jint param_count, + const std::vector<CParamInfo>& params, + jint error_count, + const std::vector<jvmtiError>& errors) { + jvmtiExtensionFunctionInfo func_info; + jvmtiError error; + + func_info.func = func; + + JvmtiUniquePtr<char[]> id_ptr = CopyString(env, id, &error); + if (id_ptr == nullptr) { + return error; + } + func_info.id = id_ptr.get(); + char_buffers.push_back(std::move(id_ptr)); + + JvmtiUniquePtr<char[]> descr = CopyString(env, short_description, &error); + if (descr == nullptr) { + return error; + } + func_info.short_description = descr.get(); + char_buffers.push_back(std::move(descr)); + + func_info.param_count = param_count; + if (param_count > 0) { + JvmtiUniquePtr<jvmtiParamInfo[]> params_ptr = + AllocJvmtiUniquePtr<jvmtiParamInfo[]>(env, param_count, &error); + if (params_ptr == nullptr) { + return error; + } + func_info.params = params_ptr.get(); + param_buffers.push_back(std::move(params_ptr)); + + for (jint i = 0; i != param_count; ++i) { + JvmtiUniquePtr<char[]> param_name = CopyString(env, params[i].name, &error); + if (param_name == nullptr) { + return error; + } + func_info.params[i].name = param_name.get(); + char_buffers.push_back(std::move(param_name)); + + func_info.params[i].kind = params[i].kind; + func_info.params[i].base_type = params[i].base_type; + func_info.params[i].null_ok = params[i].null_ok; + } + } else { + func_info.params = nullptr; + } + + func_info.error_count = error_count; + if (error_count > 0) { + JvmtiUniquePtr<jvmtiError[]> errors_ptr = + AllocJvmtiUniquePtr<jvmtiError[]>(env, error_count, &error); + if (errors_ptr == nullptr) { + return error; + } + func_info.errors = errors_ptr.get(); + error_buffers.push_back(std::move(errors_ptr)); + + for (jint i = 0; i != error_count; ++i) { + func_info.errors[i] = errors[i]; + } + } else { + func_info.errors = nullptr; + } + + ext_vector.push_back(func_info); + + return ERR(NONE); + }; + + jvmtiError error; + + // Heap extensions. + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetObjectHeapId), + "com.android.art.heap.get_object_heap_id", + "Retrieve the heap id of the the object tagged with the given argument. An " + "arbitrary object is chosen if multiple objects exist with the same tag.", + 2, + { // NOLINT [whitespace/braces] [4] + { "tag", JVMTI_KIND_IN, JVMTI_TYPE_JLONG, false}, + { "heap_id", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false} + }, + 1, + { JVMTI_ERROR_NOT_FOUND }); + if (error != ERR(NONE)) { + return error; + } + + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetHeapName), + "com.android.art.heap.get_heap_name", + "Retrieve the name of the heap with the given id.", + 2, + { // NOLINT [whitespace/braces] [4] + { "heap_id", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false}, + { "heap_name", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_CCHAR, false} + }, + 1, + { JVMTI_ERROR_ILLEGAL_ARGUMENT }); + if (error != ERR(NONE)) { + return error; + } + + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::IterateThroughHeapExt), + "com.android.art.heap.iterate_through_heap_ext", + "Iterate through a heap. This is equivalent to the standard IterateThroughHeap function," + " except for additionally passing the heap id of the current object. The jvmtiHeapCallbacks" + " structure is reused, with the callbacks field overloaded to a signature of " + "jint (*)(jlong, jlong, jlong*, jint length, void*, jint).", + 4, + { // NOLINT [whitespace/braces] [4] + { "heap_filter", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false}, + { "klass", JVMTI_KIND_IN, JVMTI_TYPE_JCLASS, true}, + { "callbacks", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, false}, + { "user_data", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, true} + }, + 3, + { // NOLINT [whitespace/braces] [4] + JVMTI_ERROR_MUST_POSSESS_CAPABILITY, + JVMTI_ERROR_INVALID_CLASS, + JVMTI_ERROR_NULL_POINTER + }); + if (error != ERR(NONE)) { + return error; + } + + // Copy into output buffer. + + *extension_count_ptr = ext_vector.size(); + JvmtiUniquePtr<jvmtiExtensionFunctionInfo[]> out_data = + AllocJvmtiUniquePtr<jvmtiExtensionFunctionInfo[]>(env, ext_vector.size(), &error); + if (out_data == nullptr) { + return error; + } + memcpy(out_data.get(), + ext_vector.data(), + ext_vector.size() * sizeof(jvmtiExtensionFunctionInfo)); + *extensions = out_data.release(); + + // Release all the buffer holders, we're OK now. + for (auto& holder : char_buffers) { + holder.release(); + } + for (auto& holder : param_buffers) { + holder.release(); + } + for (auto& holder : error_buffers) { + holder.release(); + } return ERR(NONE); } @@ -1358,23 +1529,26 @@ class JvmtiFunctions { static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error, char** name_ptr) { ENSURE_NON_NULL(name_ptr); + auto copy_fn = [&](const char* name_cstr) { + jvmtiError res; + JvmtiUniquePtr<char[]> copy = CopyString(env, name_cstr, &res); + if (copy == nullptr) { + *name_ptr = nullptr; + return res; + } else { + *name_ptr = copy.release(); + return OK; + } + }; switch (error) { -#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \ - jvmtiError res; \ - JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_"#e, &res); \ - if (copy == nullptr) { \ - *name_ptr = nullptr; \ - return res; \ - } else { \ - *name_ptr = copy.release(); \ - return OK; \ - } \ - } while (false) +#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : \ + return copy_fn("JVMTI_ERROR_"#e); ERROR_CASE(NONE); ERROR_CASE(INVALID_THREAD); ERROR_CASE(INVALID_THREAD_GROUP); ERROR_CASE(INVALID_PRIORITY); ERROR_CASE(THREAD_NOT_SUSPENDED); + ERROR_CASE(THREAD_SUSPENDED); ERROR_CASE(THREAD_NOT_ALIVE); ERROR_CASE(INVALID_OBJECT); ERROR_CASE(INVALID_CLASS); @@ -1419,18 +1593,9 @@ class JvmtiFunctions { ERROR_CASE(UNATTACHED_THREAD); ERROR_CASE(INVALID_ENVIRONMENT); #undef ERROR_CASE - default: { - jvmtiError res; - JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_UNKNOWN", &res); - if (copy == nullptr) { - *name_ptr = nullptr; - return res; - } else { - *name_ptr = copy.release(); - return ERR(ILLEGAL_ARGUMENT); - } - } } + + return ERR(ILLEGAL_ARGUMENT); } static jvmtiError SetVerboseFlag(jvmtiEnv* env, diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h index f67fffccbb..64ab3e7b2e 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h +++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h @@ -384,6 +384,23 @@ jvmtiError JvmtiWeakTable<T>::GetTaggedObjects(jvmtiEnv* jvmti_env, return ERR(NONE); } +template <typename T> +art::mirror::Object* JvmtiWeakTable<T>::Find(T tag) { + art::Thread* self = art::Thread::Current(); + art::MutexLock mu(self, allow_disallow_lock_); + Wait(self); + + for (auto& pair : tagged_objects_) { + if (tag == pair.second) { + art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>(); + if (obj != nullptr) { + return obj; + } + } + } + return nullptr; +} + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_ diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h index eeea75aa9d..a6fd247c51 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table.h +++ b/runtime/openjdkjvmti/jvmti_weak_table.h @@ -116,6 +116,10 @@ class JvmtiWeakTable : public art::gc::SystemWeakHolder { void Unlock() RELEASE(allow_disallow_lock_); void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_); + art::mirror::Object* Find(T tag) + REQUIRES_SHARED(art::Locks::mutator_lock_) + REQUIRES(!allow_disallow_lock_); + protected: // Should HandleNullSweep be called when Sweep detects the release of an object? virtual bool DoesHandleNullOnSweep() { diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc index 7fc5104bce..99774c67b5 100644 --- a/runtime/openjdkjvmti/ti_heap.cc +++ b/runtime/openjdkjvmti/ti_heap.cc @@ -651,14 +651,17 @@ void HeapUtil::Unregister() { art::Runtime::Current()->RemoveSystemWeakHolder(&gIndexCachingTable); } +template <typename Callback> struct IterateThroughHeapData { - IterateThroughHeapData(HeapUtil* _heap_util, + IterateThroughHeapData(Callback _cb, + ObjectTagTable* _tag_table, jvmtiEnv* _env, art::ObjPtr<art::mirror::Class> klass, jint _heap_filter, const jvmtiHeapCallbacks* _callbacks, const void* _user_data) - : heap_util(_heap_util), + : cb(_cb), + tag_table(_tag_table), heap_filter(_heap_filter), filter_klass(klass), env(_env), @@ -667,95 +670,89 @@ struct IterateThroughHeapData { stop_reports(false) { } - HeapUtil* heap_util; - const HeapFilter heap_filter; - art::ObjPtr<art::mirror::Class> filter_klass; - jvmtiEnv* env; - const jvmtiHeapCallbacks* callbacks; - const void* user_data; - - bool stop_reports; -}; - -static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg) - REQUIRES_SHARED(art::Locks::mutator_lock_) { - IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg); - // Early return, as we can't really stop visiting. - if (ithd->stop_reports) { - return; + static void ObjectCallback(art::mirror::Object* obj, void* arg) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg); + ithd->ObjectCallback(obj); } - art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback"); + void ObjectCallback(art::mirror::Object* obj) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + // Early return, as we can't really stop visiting. + if (stop_reports) { + return; + } - jlong tag = 0; - ithd->heap_util->GetTags()->GetTag(obj, &tag); + art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback"); - jlong class_tag = 0; - art::ObjPtr<art::mirror::Class> klass = obj->GetClass(); - ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag); - // For simplicity, even if we find a tag = 0, assume 0 = not tagged. + jlong tag = 0; + tag_table->GetTag(obj, &tag); - if (!ithd->heap_filter.ShouldReportByHeapFilter(tag, class_tag)) { - return; - } + jlong class_tag = 0; + art::ObjPtr<art::mirror::Class> klass = obj->GetClass(); + tag_table->GetTag(klass.Ptr(), &class_tag); + // For simplicity, even if we find a tag = 0, assume 0 = not tagged. - if (ithd->filter_klass != nullptr) { - if (ithd->filter_klass != klass) { + if (!heap_filter.ShouldReportByHeapFilter(tag, class_tag)) { return; } - } - jlong size = obj->SizeOf(); + if (filter_klass != nullptr) { + if (filter_klass != klass) { + return; + } + } - jint length = -1; - if (obj->IsArrayInstance()) { - length = obj->AsArray()->GetLength(); - } + jlong size = obj->SizeOf(); - jlong saved_tag = tag; - jint ret = ithd->callbacks->heap_iteration_callback(class_tag, - size, - &tag, - length, - const_cast<void*>(ithd->user_data)); + jint length = -1; + if (obj->IsArrayInstance()) { + length = obj->AsArray()->GetLength(); + } - if (tag != saved_tag) { - ithd->heap_util->GetTags()->Set(obj, tag); - } + jlong saved_tag = tag; + jint ret = cb(obj, callbacks, class_tag, size, &tag, length, const_cast<void*>(user_data)); - ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0; + if (tag != saved_tag) { + tag_table->Set(obj, tag); + } - if (!ithd->stop_reports) { - jint string_ret = ReportString(obj, - ithd->env, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); - ithd->stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0; - } + stop_reports = (ret & JVMTI_VISIT_ABORT) != 0; - if (!ithd->stop_reports) { - jint array_ret = ReportPrimitiveArray(obj, - ithd->env, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); - ithd->stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0; - } + if (!stop_reports) { + jint string_ret = ReportString(obj, env, tag_table, callbacks, user_data); + stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0; + } + + if (!stop_reports) { + jint array_ret = ReportPrimitiveArray(obj, env, tag_table, callbacks, user_data); + stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0; + } - if (!ithd->stop_reports) { - ithd->stop_reports = ReportPrimitiveField::Report(obj, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); + if (!stop_reports) { + stop_reports = ReportPrimitiveField::Report(obj, tag_table, callbacks, user_data); + } } -} -jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, - jint heap_filter, - jclass klass, - const jvmtiHeapCallbacks* callbacks, - const void* user_data) { + Callback cb; + ObjectTagTable* tag_table; + const HeapFilter heap_filter; + art::ObjPtr<art::mirror::Class> filter_klass; + jvmtiEnv* env; + const jvmtiHeapCallbacks* callbacks; + const void* user_data; + + bool stop_reports; +}; + +template <typename T> +static jvmtiError DoIterateThroughHeap(T fn, + jvmtiEnv* env, + ObjectTagTable* tag_table, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { if (callbacks == nullptr) { return ERR(NULL_POINTER); } @@ -763,16 +760,46 @@ jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, art::Thread* self = art::Thread::Current(); art::ScopedObjectAccess soa(self); // Now we know we have the shared lock. - IterateThroughHeapData ithd(this, + using Iterator = IterateThroughHeapData<T>; + Iterator ithd(fn, + tag_table, + env, + soa.Decode<art::mirror::Class>(klass), + heap_filter, + callbacks, + user_data); + + art::Runtime::Current()->GetHeap()->VisitObjects(Iterator::ObjectCallback, &ithd); + + return ERR(NONE); +} + +jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { + auto JvmtiIterateHeap = [](art::mirror::Object* obj ATTRIBUTE_UNUSED, + const jvmtiHeapCallbacks* cb_callbacks, + jlong class_tag, + jlong size, + jlong* tag, + jint length, + void* cb_user_data) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + return cb_callbacks->heap_iteration_callback(class_tag, + size, + tag, + length, + cb_user_data); + }; + return DoIterateThroughHeap(JvmtiIterateHeap, env, - soa.Decode<art::mirror::Class>(klass), + ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(), heap_filter, + klass, callbacks, user_data); - - art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd); - - return ERR(NONE); } class FollowReferencesHelper FINAL { @@ -1400,4 +1427,136 @@ jvmtiError HeapUtil::ForceGarbageCollection(jvmtiEnv* env ATTRIBUTE_UNUSED) { return ERR(NONE); } + +static constexpr jint kHeapIdDefault = 0; +static constexpr jint kHeapIdImage = 1; +static constexpr jint kHeapIdZygote = 2; +static constexpr jint kHeapIdApp = 3; + +static jint GetHeapId(art::ObjPtr<art::mirror::Object> obj) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + if (obj == nullptr) { + return -1; + } + + art::gc::Heap* const heap = art::Runtime::Current()->GetHeap(); + const art::gc::space::ContinuousSpace* const space = + heap->FindContinuousSpaceFromObject(obj, true); + jint heap_type = kHeapIdApp; + if (space != nullptr) { + if (space->IsZygoteSpace()) { + heap_type = kHeapIdZygote; + } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { + // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects + // as HPROF_HEAP_APP. b/35762934 + heap_type = kHeapIdImage; + } + } else { + const auto* los = heap->GetLargeObjectsSpace(); + if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(art::Thread::Current(), obj.Ptr())) { + heap_type = kHeapIdZygote; + } + } + return heap_type; +}; + +jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) { + if (heap_id == nullptr) { + return ERR(NULL_POINTER); + } + + art::Thread* self = art::Thread::Current(); + + auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) { + ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(); + art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag); + jint heap_type = GetHeapId(obj); + if (heap_type == -1) { + return ERR(NOT_FOUND); + } + *heap_id = heap_type; + return ERR(NONE); + }; + + if (!art::Locks::mutator_lock_->IsSharedHeld(self)) { + if (!self->IsThreadSuspensionAllowable()) { + return ERR(INTERNAL); + } + art::ScopedObjectAccess soa(self); + return work(); + } else { + // We cannot use SOA in this case. We might be holding the lock, but may not be in the + // runnable state (e.g., during GC). + art::Locks::mutator_lock_->AssertSharedHeld(self); + // TODO: Investigate why ASSERT_SHARED_CAPABILITY doesn't work. + auto annotalysis_workaround = [&]() NO_THREAD_SAFETY_ANALYSIS { + return work(); + }; + return annotalysis_workaround(); + } +} + +static jvmtiError CopyStringAndReturn(jvmtiEnv* env, const char* in, char** out) { + jvmtiError error; + JvmtiUniquePtr<char[]> param_name = CopyString(env, in, &error); + if (param_name == nullptr) { + return error; + } + *out = param_name.release(); + return ERR(NONE); +} + +static constexpr const char* kHeapIdDefaultName = "default"; +static constexpr const char* kHeapIdImageName = "image"; +static constexpr const char* kHeapIdZygoteName = "zygote"; +static constexpr const char* kHeapIdAppName = "app"; + +jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...) { + switch (heap_id) { + case kHeapIdDefault: + return CopyStringAndReturn(env, kHeapIdDefaultName, heap_name); + case kHeapIdImage: + return CopyStringAndReturn(env, kHeapIdImageName, heap_name); + case kHeapIdZygote: + return CopyStringAndReturn(env, kHeapIdZygoteName, heap_name); + case kHeapIdApp: + return CopyStringAndReturn(env, kHeapIdAppName, heap_name); + + default: + return ERR(ILLEGAL_ARGUMENT); + } +} + +jvmtiError HeapExtensions::IterateThroughHeapExt(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { + if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.can_tag_objects != 1) { \ + return ERR(MUST_POSSESS_CAPABILITY); \ + } + + // ART extension API: Also pass the heap id. + auto ArtIterateHeap = [](art::mirror::Object* obj, + const jvmtiHeapCallbacks* cb_callbacks, + jlong class_tag, + jlong size, + jlong* tag, + jint length, + void* cb_user_data) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + jint heap_id = GetHeapId(obj); + using ArtExtensionAPI = jint (*)(jlong, jlong, jlong*, jint length, void*, jint); + return reinterpret_cast<ArtExtensionAPI>(cb_callbacks->heap_iteration_callback)( + class_tag, size, tag, length, cb_user_data, heap_id); + }; + return DoIterateThroughHeap(ArtIterateHeap, + env, + ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(), + heap_filter, + klass, + callbacks, + user_data); +} + } // namespace openjdkjvmti diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h index dccecb4aa3..0c973db199 100644 --- a/runtime/openjdkjvmti/ti_heap.h +++ b/runtime/openjdkjvmti/ti_heap.h @@ -56,6 +56,18 @@ class HeapUtil { ObjectTagTable* tags_; }; +class HeapExtensions { + public: + static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...); + static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...); + + static jvmtiError JNICALL IterateThroughHeapExt(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data); +}; + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_ diff --git a/runtime/os.h b/runtime/os.h index 46d89fb8a5..7130fc3732 100644 --- a/runtime/os.h +++ b/runtime/os.h @@ -44,7 +44,7 @@ class OS { static File* CreateEmptyFileWriteOnly(const char* name); // Open a file with the specified open(2) flags. - static File* OpenFileWithFlags(const char* name, int flags); + static File* OpenFileWithFlags(const char* name, int flags, bool auto_flush = true); // Check if a file exists. static bool FileExists(const char* name); diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc index 1db09b4445..0add4965d1 100644 --- a/runtime/os_linux.cc +++ b/runtime/os_linux.cc @@ -51,10 +51,11 @@ File* OS::CreateEmptyFileWriteOnly(const char* name) { return art::CreateEmptyFile(name, O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC); } -File* OS::OpenFileWithFlags(const char* name, int flags) { +File* OS::OpenFileWithFlags(const char* name, int flags, bool auto_flush) { CHECK(name != nullptr); bool read_only = ((flags & O_ACCMODE) == O_RDONLY); - std::unique_ptr<File> file(new File(name, flags, 0666, !read_only)); + bool check_usage = !read_only && auto_flush; + std::unique_ptr<File> file(new File(name, flags, 0666, check_usage)); if (!file->IsOpened()) { return nullptr; } diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index b8669412fc..db10103c4b 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -530,7 +530,7 @@ void QuickExceptionHandler::DeoptimizeStack() { PrepareForLongJumpToInvokeStubOrInterpreterBridge(); } -void QuickExceptionHandler::DeoptimizeSingleFrame() { +void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) { DCHECK(is_deoptimization_); if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { @@ -544,6 +544,10 @@ void QuickExceptionHandler::DeoptimizeSingleFrame() { // Compiled code made an explicit deoptimization. ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod(); DCHECK(deopt_method != nullptr); + LOG(INFO) << "Deoptimizing " + << deopt_method->PrettyMethod() + << " due to " + << GetDeoptimizationKindName(kind); if (Runtime::Current()->UseJitCompilation()) { Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor( deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader()); diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 3ead7dbe64..8090f9b035 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -20,6 +20,7 @@ #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "stack_reference.h" namespace art { @@ -62,7 +63,7 @@ class QuickExceptionHandler { // the result of IsDeoptimizeable(). // - It can be either full-fragment, or partial-fragment deoptimization, depending // on whether that single frame covers full or partial fragment. - void DeoptimizeSingleFrame() REQUIRES_SHARED(Locks::mutator_lock_); + void DeoptimizeSingleFrame(DeoptimizationKind kind) REQUIRES_SHARED(Locks::mutator_lock_); void DeoptimizePartialFragmentFixup(uintptr_t return_pc) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index eb068b3cad..60fa0828a0 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -262,6 +262,9 @@ Runtime::Runtime() std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u); interpreter::CheckInterpreterAsmConstants(); callbacks_.reset(new RuntimeCallbacks()); + for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) { + deoptimization_counts_[i] = 0u; + } } Runtime::~Runtime() { @@ -336,6 +339,16 @@ Runtime::~Runtime() { jit_->DeleteThreadPool(); } + // Make sure our internal threads are dead before we start tearing down things they're using. + Dbg::StopJdwp(); + delete signal_catcher_; + + // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended. + { + ScopedTrace trace2("Delete thread list"); + thread_list_->ShutDown(); + } + // TODO Maybe do some locking. for (auto& agent : agents_) { agent.Unload(); @@ -346,15 +359,9 @@ Runtime::~Runtime() { plugin.Unload(); } - // Make sure our internal threads are dead before we start tearing down things they're using. - Dbg::StopJdwp(); - delete signal_catcher_; + // Finally delete the thread list. + delete thread_list_; - // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended. - { - ScopedTrace trace2("Delete thread list"); - delete thread_list_; - } // Delete the JIT after thread list to ensure that there is no remaining threads which could be // accessing the instrumentation when we delete it. if (jit_ != nullptr) { @@ -1571,6 +1578,23 @@ void Runtime::RegisterRuntimeNativeMethods(JNIEnv* env) { register_sun_misc_Unsafe(env); } +std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind) { + os << GetDeoptimizationKindName(kind); + return os; +} + +void Runtime::DumpDeoptimizations(std::ostream& os) { + for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) { + if (deoptimization_counts_[i] != 0) { + os << "Number of " + << GetDeoptimizationKindName(static_cast<DeoptimizationKind>(i)) + << " deoptimizations: " + << deoptimization_counts_[i] + << "\n"; + } + } +} + void Runtime::DumpForSigQuit(std::ostream& os) { GetClassLinker()->DumpForSigQuit(os); GetInternTable()->DumpForSigQuit(os); @@ -1582,6 +1606,7 @@ void Runtime::DumpForSigQuit(std::ostream& os) { } else { os << "Running non JIT\n"; } + DumpDeoptimizations(os); TrackedAllocators::Dump(os); os << "\n"; diff --git a/runtime/runtime.h b/runtime/runtime.h index 3ba0f2cd42..8d047770ae 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -29,6 +29,7 @@ #include "arch/instruction_set.h" #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "dex_file_types.h" #include "experimental_flags.h" #include "gc_root.h" @@ -235,6 +236,7 @@ class Runtime { // Detaches the current native thread from the runtime. void DetachCurrentThread() REQUIRES(!Locks::mutator_lock_); + void DumpDeoptimizations(std::ostream& os); void DumpForSigQuit(std::ostream& os); void DumpLockHolders(std::ostream& os); @@ -682,6 +684,11 @@ class Runtime { dump_gc_performance_on_shutdown_ = value; } + void IncrementDeoptimizationCount(DeoptimizationKind kind) { + DCHECK_LE(kind, DeoptimizationKind::kLast); + deoptimization_counts_[static_cast<size_t>(kind)]++; + } + private: static void InitPlatformSignalHandlers(); @@ -941,6 +948,9 @@ class Runtime { std::unique_ptr<RuntimeCallbacks> callbacks_; + std::atomic<uint32_t> deoptimization_counts_[ + static_cast<uint32_t>(DeoptimizationKind::kLast) + 1]; + DISALLOW_COPY_AND_ASSIGN(Runtime); }; std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs); diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc index abe99e0d50..640f9ce848 100644 --- a/runtime/runtime_callbacks_test.cc +++ b/runtime/runtime_callbacks_test.cc @@ -335,6 +335,9 @@ class RuntimeSigQuitCallbackRuntimeCallbacksTest : public RuntimeCallbacksTest { }; TEST_F(RuntimeSigQuitCallbackRuntimeCallbacksTest, SigQuit) { + // SigQuit induces a dump. ASAN isn't happy with libunwind reading memory. + TEST_DISABLED_FOR_MEMORY_TOOL_ASAN(); + // The runtime needs to be started for the signal handler. Thread* self = Thread::Current(); diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc index 382643314c..faea7b3821 100644 --- a/runtime/signal_catcher.cc +++ b/runtime/signal_catcher.cc @@ -115,7 +115,7 @@ std::string SignalCatcher::GetStackTraceFileName() { for (uint32_t i = 0; i < kMaxRetries; ++i) { std::srand(NanoTime()); - // Sample output for PID 1234 : /data/anr-pid1234-cafeffee.txt + // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt const std::string file_name = android::base::StringPrintf( "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt", stack_trace_dir_.c_str(), @@ -135,19 +135,19 @@ std::string SignalCatcher::GetStackTraceFileName() { } void SignalCatcher::Output(const std::string& s) { - const std::string stack_trace_file = GetStackTraceFileName(); - if (stack_trace_file.empty()) { + const std::string output_file = GetStackTraceFileName(); + if (output_file.empty()) { LOG(INFO) << s; return; } ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput); - int fd = open(stack_trace_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666); + int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666); if (fd == -1) { - PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'"; + PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'"; return; } - std::unique_ptr<File> file(new File(fd, stack_trace_file, true)); + std::unique_ptr<File> file(new File(fd, output_file, true)); bool success = file->WriteFully(s.data(), s.size()); if (success) { success = file->FlushCloseOrErase() == 0; @@ -155,9 +155,9 @@ void SignalCatcher::Output(const std::string& s) { file->Erase(); } if (success) { - LOG(INFO) << "Wrote stack traces to '" << stack_trace_file << "'"; + LOG(INFO) << "Wrote stack traces to '" << output_file << "'"; } else { - PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file << "'"; + PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'"; } } diff --git a/runtime/thread.cc b/runtime/thread.cc index 62a616b646..653a9bd1d4 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -129,12 +129,12 @@ static void UnimplementedEntryPoint() { } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking); +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active); void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) { CHECK(kUseReadBarrier); tls32_.is_gc_marking = is_marking; - UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking); + UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active */ is_marking); ResetQuickAllocEntryPointsForThread(is_marking); } @@ -3604,4 +3604,9 @@ mirror::Object* Thread::GetPeerFromOtherThread() const { return peer; } +void Thread::SetReadBarrierEntrypoints() { + // Make sure entrypoints aren't null. + UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true); +} + } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index 5251012cbb..6abde5b450 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1180,6 +1180,9 @@ class Thread { return false; } + // Set to the read barrier marking entrypoints to be non-null. + void SetReadBarrierEntrypoints(); + static jobject CreateCompileTimePeer(JNIEnv* env, const char* name, bool as_daemon, diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 2e0d866c21..b63eaa40ef 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -73,12 +73,17 @@ ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns) unregistering_count_(0), suspend_all_historam_("suspend all histogram", 16, 64), long_suspend_(false), + shut_down_(false), thread_suspend_timeout_ns_(thread_suspend_timeout_ns), empty_checkpoint_barrier_(new Barrier(0)) { CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U))); } ThreadList::~ThreadList() { + CHECK(shut_down_); +} + +void ThreadList::ShutDown() { ScopedTrace trace(__PRETTY_FUNCTION__); // Detach the current thread if necessary. If we failed to start, there might not be any threads. // We need to detach the current thread here in case there's another thread waiting to join with @@ -102,6 +107,8 @@ ThreadList::~ThreadList() { // TODO: there's an unaddressed race here where a thread may attach during shutdown, see // Thread::Init. SuspendAllDaemonThreadsForShutdown(); + + shut_down_ = true; } bool ThreadList::Contains(Thread* thread) { @@ -1362,6 +1369,7 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() { void ThreadList::Register(Thread* self) { DCHECK_EQ(self, Thread::Current()); + CHECK(!shut_down_); if (VLOG_IS_ON(threads)) { std::ostringstream oss; @@ -1387,13 +1395,14 @@ void ThreadList::Register(Thread* self) { CHECK(!Contains(self)); list_.push_back(self); if (kUseReadBarrier) { + gc::collector::ConcurrentCopying* const cc = + Runtime::Current()->GetHeap()->ConcurrentCopyingCollector(); // Initialize according to the state of the CC collector. - bool is_gc_marking = - Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking(); - self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking); - bool weak_ref_access_enabled = - Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled(); - self->SetWeakRefAccessEnabled(weak_ref_access_enabled); + self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking()); + if (cc->IsUsingReadBarrierEntrypoints()) { + self->SetReadBarrierEntrypoints(); + } + self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled()); } } diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 70917eb0f7..14bef5e2b9 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -50,6 +50,8 @@ class ThreadList { explicit ThreadList(uint64_t thread_suspend_timeout_ns); ~ThreadList(); + void ShutDown(); + void DumpForSigQuit(std::ostream& os) REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_); // For thread suspend timeout dumps. @@ -219,6 +221,10 @@ class ThreadList { // Whether or not the current thread suspension is long. bool long_suspend_; + // Whether the shutdown function has been called. This is checked in the destructor. It is an + // error to destroy a ThreadList instance without first calling ShutDown(). + bool shut_down_; + // Thread suspension timeout in nanoseconds. const uint64_t thread_suspend_timeout_ns_; diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc index e93f04d082..842aa04dfb 100644 --- a/runtime/vdex_file.cc +++ b/runtime/vdex_file.cc @@ -28,6 +28,7 @@ namespace art { +constexpr uint8_t VdexFile::Header::kVdexInvalidMagic[4]; constexpr uint8_t VdexFile::Header::kVdexMagic[4]; constexpr uint8_t VdexFile::Header::kVdexVersion[4]; diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h index 9c0d9dba8f..ece5491472 100644 --- a/runtime/vdex_file.h +++ b/runtime/vdex_file.h @@ -61,6 +61,8 @@ class VdexFile { uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; } uint32_t GetNumberOfDexFiles() const { return number_of_dex_files_; } + static constexpr uint8_t kVdexInvalidMagic[] = { 'w', 'd', 'e', 'x' }; + private: static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' }; static constexpr uint8_t kVdexVersion[] = { '0', '0', '5', '\0' }; // access flags diff --git a/test/121-modifiers/info.txt b/test/121-modifiers/info.txt index 129aee8ae6..335df53f3d 100644 --- a/test/121-modifiers/info.txt +++ b/test/121-modifiers/info.txt @@ -14,5 +14,5 @@ mv Inf.out classes/Inf.class mv NonInf.out classes/NonInf.class mv Main.class A.class A\$B.class A\$C.class classes/ dx --debug --dex --output=classes.dex classes -baksmali classes.dex +baksmali disassemble classes.dex mv out/*.smali smali/ diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java index 330aa7416e..a538f52fa6 100644 --- a/test/476-checker-ctor-memory-barrier/src/Main.java +++ b/test/476-checker-ctor-memory-barrier/src/Main.java @@ -17,8 +17,8 @@ // TODO: Add more tests after we can inline functions with calls. class ClassWithoutFinals { - /// CHECK-START: void ClassWithoutFinals.<init>() register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithoutFinals.<init>() inliner (after) + /// CHECK-NOT: ConstructorFence public ClassWithoutFinals() {} } @@ -33,17 +33,40 @@ class ClassWithFinals { // should not inline this constructor } - /// CHECK-START: void ClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid + + /* + * Check that the correct assembly instructions are selected for a Store/Store fence. + * + * - ARM variants: DMB ISHST (store-store fence for inner shareable domain) + * - Intel variants: no-op (store-store does not need a fence). + */ + + /// CHECK-START-ARM64: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NEXT: dmb ishst + + /// CHECK-START-ARM: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NEXT: dmb ishst + + /// CHECK-START-X86_64: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NOT: {{[slm]}}fence + + /// CHECK-START-X86: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NOT: {{[slm]}}fence public ClassWithFinals() { // Exactly one constructor barrier. x = 0; } - /// CHECK-START: void ClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid public ClassWithFinals(int x) { // This should have exactly two barriers: @@ -55,11 +78,11 @@ class ClassWithFinals { } class InheritFromClassWithFinals extends ClassWithFinals { - /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public InheritFromClassWithFinals() { // Should inline the super constructor. @@ -67,23 +90,23 @@ class InheritFromClassWithFinals extends ClassWithFinals { // Exactly one constructor barrier here. } - /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after) + /// CHECK-NOT: ConstructorFence public InheritFromClassWithFinals(boolean cond) { super(cond); // should not inline the super constructor } - /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK-NOT: ConstructorFence /// CHECK: ReturnVoid - /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public InheritFromClassWithFinals(int unused) { // Should inline the super constructor and insert a memory barrier. @@ -96,21 +119,21 @@ class InheritFromClassWithFinals extends ClassWithFinals { class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { final int y; - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public HaveFinalsAndInheritFromClassWithFinals() { // Should inline the super constructor and keep the memory barrier. y = 0; } - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid public HaveFinalsAndInheritFromClassWithFinals(boolean cond) { super(cond); @@ -118,15 +141,15 @@ class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { y = 0; } - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public HaveFinalsAndInheritFromClassWithFinals(int unused) { // Should inline the super constructor and keep keep both memory barriers. @@ -141,55 +164,55 @@ class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { public class Main { - /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after) + /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after) + /// CHECK-NOT: ConstructorFence public static ClassWithFinals noInlineNoConstructorBarrier() { return new ClassWithFinals(false); // should not inline the constructor } - /// CHECK-START: void Main.inlineNew() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew() register (after) + /// CHECK-START: void Main.inlineNew() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew() { new ClassWithFinals(); } - /// CHECK-START: void Main.inlineNew1() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew1() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew1() register (after) + /// CHECK-START: void Main.inlineNew1() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew1() { new InheritFromClassWithFinals(); } - /// CHECK-START: void Main.inlineNew2() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew2() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew2() register (after) + /// CHECK-START: void Main.inlineNew2() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew2() { new HaveFinalsAndInheritFromClassWithFinals(); } - /// CHECK-START: void Main.inlineNew3() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew3() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew3() register (after) + /// CHECK-START: void Main.inlineNew3() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew3() { new HaveFinalsAndInheritFromClassWithFinals(); diff --git a/test/530-checker-lse-ctor-fences/expected.txt b/test/530-checker-lse-ctor-fences/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/expected.txt diff --git a/test/530-checker-lse-ctor-fences/info.txt b/test/530-checker-lse-ctor-fences/info.txt new file mode 100644 index 0000000000..ccc7b47de9 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/info.txt @@ -0,0 +1 @@ +Checker test for testing load-store elimination with final fields (constructor fences). diff --git a/test/530-checker-lse-ctor-fences/src/Main.java b/test/530-checker-lse-ctor-fences/src/Main.java new file mode 100644 index 0000000000..7755875b65 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/src/Main.java @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This base class has a single final field; +// the constructor should have one fence. +class Circle { + Circle(double radius) { + this.radius = radius; + } + public double getRadius() { + return radius; + } + public double getArea() { + return radius * radius * Math.PI; + } + + public double getCircumference() { + return 2 * Math.PI * radius; + } + + private final double radius; +} + +// This subclass adds an extra final field; +// there should be an extra constructor fence added +// (for a total of 2 after inlining). +class Ellipse extends Circle { + Ellipse(double vertex, double covertex) { + super(vertex); + + this.covertex = covertex; + } + + public double getVertex() { + return getRadius(); + } + + public double getCovertex() { + return covertex; + } + + @Override + public double getArea() { + return getRadius() * covertex * Math.PI; + } + + private final double covertex; +} + +class CalcCircleAreaOrCircumference { + public static final int TYPE_AREA = 0; + public static final int TYPE_CIRCUMFERENCE = 1; + + double value; + + public CalcCircleAreaOrCircumference(int type) { + this.type = type; + } + + final int type; +} + +public class Main { + + /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after) + /// CHECK-NOT: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: ConstructorFence + /// CHECK-NOT: InstanceFieldGet + + // Make sure the constructor fence gets eliminated when the allocation is eliminated. + static double calcCircleArea(double radius) { + return new Circle(radius).getArea(); + } + + /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (after) + /// CHECK-NOT: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: ConstructorFence + /// CHECK-NOT: InstanceFieldGet + + // Multiple constructor fences can accumulate through inheritance, make sure + // they are all eliminated when the allocation is eliminated. + static double calcEllipseArea(double vertex, double covertex) { + return new Ellipse(vertex, covertex).getArea(); + } + + /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK-NOT: ConstructorFence + + // + // The object allocation will not be eliminated by LSE because of aliased stores. + // However the object is still a singleton, so it never escapes the current thread. + // There should not be a constructor fence here after LSE. + static double calcCircleAreaOrCircumference(double radius, boolean area_or_circumference) { + CalcCircleAreaOrCircumference calc = + new CalcCircleAreaOrCircumference( + area_or_circumference ? CalcCircleAreaOrCircumference.TYPE_AREA : + CalcCircleAreaOrCircumference.TYPE_CIRCUMFERENCE); + + if (area_or_circumference) { + // Area + calc.value = Math.PI * Math.PI * radius; + } else { + // Circumference + calc.value = 2 * Math.PI * radius; + } + + return calc.value; + } + + /// CHECK-START: Circle Main.makeCircle(double) load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK: ConstructorFence + + // The object allocation is considered a singleton by LSE, + // but we cannot eliminate the new because it is returned. + // + // The constructor fence must also not be removed because the object could escape the + // current thread (in the caller). + static Circle makeCircle(double radius) { + return new Circle(radius); + } + + static void assertIntEquals(int result, int expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertFloatEquals(float result, float expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertDoubleEquals(double result, double expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertInstanceOf(Object result, Class<?> expected) { + if (result.getClass() != expected) { + throw new Error("Expected type: " + expected + ", found : " + result.getClass()); + } + } + + public static void main(String[] args) { + assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI)); + assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcEllipseArea(Math.PI, Math.PI)); + assertDoubleEquals(2 * Math.PI * Math.PI, calcCircleAreaOrCircumference(Math.PI, false)); + assertInstanceOf(makeCircle(Math.PI), Circle.class); + } + + static boolean sFlag; +} diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java index 0fe3d873ea..491a9a12de 100644 --- a/test/530-checker-lse2/src/Main.java +++ b/test/530-checker-lse2/src/Main.java @@ -76,16 +76,27 @@ public class Main { /// CHECK-DAG: Deoptimize /// CHECK-DAG: Deoptimize /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance /// CHECK-DAG: NewInstance /// CHECK-DAG: NewInstance @@ -95,9 +106,14 @@ public class Main { /// CHECK-DAG: Deoptimize /// CHECK-DAG: Deoptimize /// CHECK-NOT: NewInstance + /// CHECK-NOT: ConstructorFence private float testMethod() { { + // Each of the "new" statements here will initialize an object with final fields, + // which after inlining will also retain a constructor fence. + // + // After LSE we remove the 'new-instance' and the associated constructor fence. int lI0 = (-1456058746 << mI); mD = ((double)(int)(double) mD); for (int i0 = 56 - 1; i0 >= 0; i0--) { diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build index a78021f349..027a0ea5cd 100644 --- a/test/551-checker-shifter-operand/build +++ b/test/551-checker-shifter-operand/build @@ -168,7 +168,7 @@ fi if [ "${HAS_SMALI}" = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${SKIP_DX_MERGER} = "false" ]; then diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java index e9673987da..bf09a6aa5e 100644 --- a/test/551-checker-shifter-operand/src/Main.java +++ b/test/551-checker-shifter-operand/src/Main.java @@ -642,6 +642,123 @@ public class Main { // Each test line below should see one merge. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (before) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // + // Note: simplification after inlining removes `b << 32`, `b >> 32` and `b >>> 32`. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (after) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // + // Note: simplification followed by GVN exposes the common subexpressions between shifts with larger distance + // `b << 62`, `b << 63` etc. and the equivalent smaller distances. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) GVN (after) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after) /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp @@ -670,14 +787,7 @@ public class Main { /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp /// CHECK-NOT: DataProcWithShifterOp - // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier. /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after) /// CHECK-NOT: Shl @@ -712,14 +822,7 @@ public class Main { /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp /// CHECK-NOT: DataProcWithShifterOp - // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier. /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) /// CHECK-NOT: Shl diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java index 345e9fd222..26d87b1f8a 100644 --- a/test/569-checker-pattern-replacement/src/Main.java +++ b/test/569-checker-pattern-replacement/src/Main.java @@ -331,7 +331,7 @@ public class Main { /// CHECK-START: double Main.constructBase() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBase() { @@ -347,7 +347,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int) inliner (after) /// CHECK-DAG: <<Value:i\d+>> ParameterValue @@ -371,7 +371,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBaseWith0() { @@ -387,7 +387,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after) /// CHECK-DAG: <<Value:l\d+>> ParameterValue @@ -411,7 +411,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after) /// CHECK-NOT: InstanceFieldSet @@ -431,7 +431,7 @@ public class Main { /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -460,7 +460,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -493,7 +493,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -543,7 +543,7 @@ public class Main { /// CHECK-START: double Main.constructBase(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(double) inliner (after) /// CHECK-DAG: <<Value:d\d+>> ParameterValue @@ -567,7 +567,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBaseWith0d() { @@ -605,7 +605,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int, long) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int, long) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -628,7 +628,7 @@ public class Main { /// CHECK-START: double Main.constructDerived() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerived() { @@ -644,7 +644,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int) inliner (after) /// CHECK-DAG: <<Value:i\d+>> ParameterValue @@ -668,7 +668,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWith0() { @@ -684,7 +684,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after) /// CHECK-NOT: InstanceFieldSet @@ -702,7 +702,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(double) inliner (after) /// CHECK-DAG: <<Value:d\d+>> ParameterValue @@ -726,7 +726,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWith0d() { @@ -744,7 +744,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -794,7 +794,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(float) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(float) inliner (after) /// CHECK-DAG: <<Value:f\d+>> ParameterValue @@ -821,7 +821,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -852,7 +852,7 @@ public class Main { /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructBaseWithFinalField() { @@ -873,7 +873,7 @@ public class Main { /// CHECK-DAG: <<Value:i\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -892,7 +892,7 @@ public class Main { /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructBaseWithFinalFieldWith0() { @@ -907,7 +907,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalField() { @@ -928,7 +928,7 @@ public class Main { /// CHECK-DAG: <<Value:i\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -947,7 +947,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0() { @@ -968,7 +968,7 @@ public class Main { /// CHECK-DAG: <<Value:d\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -987,7 +987,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0d() { @@ -1009,7 +1009,7 @@ public class Main { /// CHECK-DAG: <<Value:d\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -1017,8 +1017,8 @@ public class Main { /// CHECK-NOT: InstanceFieldSet /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after) - /// CHECK-DAG: MemoryBarrier - /// CHECK-NOT: MemoryBarrier + /// CHECK-DAG: ConstructorFence + /// CHECK-NOT: ConstructorFence public static double constructDerivedWithFinalField(int intValue, double doubleValue) { DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue); @@ -1034,7 +1034,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0And0d() { @@ -1049,7 +1049,7 @@ public class Main { /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex() { @@ -1070,7 +1070,7 @@ public class Main { /// CHECK-DAG: InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init> /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after) - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex(int intValue) { @@ -1091,7 +1091,7 @@ public class Main { /// CHECK-DAG: InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init> /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after) - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDexWith0() { @@ -1107,7 +1107,7 @@ public class Main { /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex(long dummy) { diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java index 2d9daf1d43..0080ffa464 100644 --- a/test/618-checker-induction/src/Main.java +++ b/test/618-checker-induction/src/Main.java @@ -468,6 +468,19 @@ public class Main { return sum; } + // Ensure double induction does not "overshoot" the subscript range. + private static int getIncr2(int[] arr) { + for (int i = 0; i < 12; ) { + arr[i++] = 30; + arr[i++] = 29; + } + int sum = 0; + for (int i = 0; i < 12; i++) { + sum += arr[i]; + } + return sum; + } + // TODO: handle as closed/empty eventually? static int mainIndexReturnedN(int n) { int i; @@ -869,6 +882,7 @@ public class Main { expectEquals(1, periodicReturned9()); expectEquals(0, periodicReturned10()); expectEquals(21, getSum21()); + expectEquals(354, getIncr2(new int[12])); for (int n = -4; n < 4; n++) { int tc = (n <= 0) ? 0 : n; expectEquals(tc, mainIndexReturnedN(n)); diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index d1f36edb18..520e7c367c 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -341,6 +341,16 @@ public class Main { } } + // Bug b/37768917: potential dynamic BCE vs. loop optimizations + // case should be deal with correctly (used to DCHECK fail). + private static void arrayInTripCount(int[] a, byte[] b, int n) { + for (int k = 0; k < n; k++) { + for (int i = 0, u = a[0]; i < u; i++) { + b[i] += 2; + } + } + } + public static void main(String[] args) { expectEquals(10, earlyExitFirst(-1)); for (int i = 0; i <= 10; i++) { @@ -436,6 +446,13 @@ public class Main { expectEquals(dd[i], 1); } + xx[0] = 10; + byte[] bt = new byte[10]; + arrayInTripCount(xx, bt, 20); + for (int i = 0; i < bt.length; i++) { + expectEquals(40, bt[i]); + } + System.out.println("passed"); } diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java index ba1e142668..97048eb951 100644 --- a/test/640-checker-int-simd/src/Main.java +++ b/test/640-checker-int-simd/src/Main.java @@ -76,6 +76,7 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START: void Main.div(int) loop_optimization (after) + /// CHECK-NOT: VecDiv // // Not supported on any architecture. // @@ -159,14 +160,81 @@ public class Main { // Shift sanity. // + // Expose constants to optimizing compiler, but not to front-end. + public static int $opt$inline$IntConstant32() { return 32; } + public static int $opt$inline$IntConstant33() { return 33; } + public static int $opt$inline$IntConstantMinus254() { return -254; } + + /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 32 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr32() loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>> outer_loop:none static void shr32() { + // TODO: remove a[i] = a[i] altogether? for (int i = 0; i < 128; i++) - a[i] >>>= 32; // 0, since & 31 + a[i] >>>= $opt$inline$IntConstant32(); // 0, since & 31 } + /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 33 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr33() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none static void shr33() { for (int i = 0; i < 128; i++) - a[i] >>>= 33; // 1, since & 31 + a[i] >>>= $opt$inline$IntConstant33(); // 1, since & 31 + } + + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none + static void shrMinus254() { + for (int i = 0; i < 128; i++) + a[i] >>>= $opt$inline$IntConstantMinus254(); // 2, since & 31 } // @@ -240,9 +308,14 @@ public class Main { for (int i = 0; i < 128; i++) { expectEquals(0x1fffffff, a[i], "shr33"); } + shrMinus254(); + for (int i = 0; i < 128; i++) { + expectEquals(0x07ffffff, a[i], "shrMinus254"); + } + // Bit-wise not operator. not(); for (int i = 0; i < 128; i++) { - expectEquals(0xe0000000, a[i], "not"); + expectEquals(0xf8000000, a[i], "not"); } // Done. System.out.println("passed"); diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java index 56411821f1..e42c716d19 100644 --- a/test/640-checker-long-simd/src/Main.java +++ b/test/640-checker-long-simd/src/Main.java @@ -74,6 +74,7 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START: void Main.div(long) loop_optimization (after) + /// CHECK-NOT: VecDiv // // Not supported on any architecture. // @@ -157,14 +158,81 @@ public class Main { // Shift sanity. // + // Expose constants to optimizing compiler, but not to front-end. + public static int $opt$inline$IntConstant64() { return 64; } + public static int $opt$inline$IntConstant65() { return 65; } + public static int $opt$inline$IntConstantMinus254() { return -254; } + + /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 64 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr64() loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>> outer_loop:none static void shr64() { + // TODO: remove a[i] = a[i] altogether? for (int i = 0; i < 128; i++) - a[i] >>>= 64; // 0, since & 63 + a[i] >>>= $opt$inline$IntConstant64(); // 0, since & 63 } + /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 65 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr65() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none static void shr65() { for (int i = 0; i < 128; i++) - a[i] >>>= 65; // 1, since & 63 + a[i] >>>= $opt$inline$IntConstant65(); // 1, since & 63 + } + + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none + static void shrMinus254() { + for (int i = 0; i < 128; i++) + a[i] >>>= $opt$inline$IntConstantMinus254(); // 2, since & 63 } // @@ -238,9 +306,14 @@ public class Main { for (int i = 0; i < 128; i++) { expectEquals(0x1fffffffffffffffL, a[i], "shr65"); } + shrMinus254(); + for (int i = 0; i < 128; i++) { + expectEquals(0x07ffffffffffffffL, a[i], "shrMinus254"); + } + // Bit-wise not operator. not(); for (int i = 0; i < 128; i++) { - expectEquals(0xe000000000000000L, a[i], "not"); + expectEquals(0xf800000000000000L, a[i], "not"); } // Done. System.out.println("passed"); diff --git a/test/648-inline-caches-unresolved/expected.txt b/test/648-inline-caches-unresolved/expected.txt new file mode 100644 index 0000000000..4e6a4384c5 --- /dev/null +++ b/test/648-inline-caches-unresolved/expected.txt @@ -0,0 +1 @@ +Subclass diff --git a/test/648-inline-caches-unresolved/info.txt b/test/648-inline-caches-unresolved/info.txt new file mode 100644 index 0000000000..8fc604281c --- /dev/null +++ b/test/648-inline-caches-unresolved/info.txt @@ -0,0 +1 @@ +Test for inlining with inline cache into an unresolved method. diff --git a/test/648-inline-caches-unresolved/profile b/test/648-inline-caches-unresolved/profile new file mode 100644 index 0000000000..92c0a41cab --- /dev/null +++ b/test/648-inline-caches-unresolved/profile @@ -0,0 +1 @@ +LMain;->inlineMonomorphicUnresolvedSuper(Ljava/lang/Object;)Ljava/lang/String;+LSubclass; diff --git a/test/648-inline-caches-unresolved/run b/test/648-inline-caches-unresolved/run new file mode 100644 index 0000000000..fb70d22867 --- /dev/null +++ b/test/648-inline-caches-unresolved/run @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +exec ${RUN} $@ --secondary --profile diff --git a/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java new file mode 100644 index 0000000000..dd3be00633 --- /dev/null +++ b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class UnresolvedSuperClass { + public void superMethod() { + System.out.println("UnresolvedClass.superMethod()"); + } +} diff --git a/test/648-inline-caches-unresolved/src/Main.java b/test/648-inline-caches-unresolved/src/Main.java new file mode 100644 index 0000000000..4e8aeec171 --- /dev/null +++ b/test/648-inline-caches-unresolved/src/Main.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main extends UnresolvedSuperClass { + public static String inlineMonomorphicUnresolvedSuper(Object o) { + return o.toString(); + } + + public static void main(String[] args) { + System.out.println(inlineMonomorphicUnresolvedSuper(new Subclass())); + } +} + +class Subclass { + public String toString() { + return "Subclass"; + } +} diff --git a/test/650-checker-inline-access-thunks/expected.txt b/test/650-checker-inline-access-thunks/expected.txt new file mode 100644 index 0000000000..d81cc0710e --- /dev/null +++ b/test/650-checker-inline-access-thunks/expected.txt @@ -0,0 +1 @@ +42 diff --git a/test/650-checker-inline-access-thunks/info.txt b/test/650-checker-inline-access-thunks/info.txt new file mode 100644 index 0000000000..e1a1eb275c --- /dev/null +++ b/test/650-checker-inline-access-thunks/info.txt @@ -0,0 +1 @@ +Test that access thunks for nested classes are inlined. diff --git a/test/650-checker-inline-access-thunks/src/Main.java b/test/650-checker-inline-access-thunks/src/Main.java new file mode 100644 index 0000000000..17f581910e --- /dev/null +++ b/test/650-checker-inline-access-thunks/src/Main.java @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + Main m = new Main(); + Nested n = new Nested(); + n.$noinline$setPrivateIntField(m, 42); + System.out.println(n.$noinline$getPrivateIntField(m)); + } + + private int privateIntField; + + private static class Nested { + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before) + /// CHECK: InvokeStaticOrDirect + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before) + /// CHECK-NOT: InstanceFieldSet + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after) + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after) + /// CHECK: InstanceFieldSet + + public void $noinline$setPrivateIntField(Main m, int value) { + m.privateIntField = value; + } + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before) + /// CHECK: InvokeStaticOrDirect + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before) + /// CHECK-NOT: InstanceFieldGet + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after) + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after) + /// CHECK: InstanceFieldGet + + public int $noinline$getPrivateIntField(Main m) { + return m.privateIntField; + } + } +} diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc index 8695e0c371..21dcf98ba7 100644 --- a/test/901-hello-ti-agent/basics.cc +++ b/test/901-hello-ti-agent/basics.cc @@ -176,5 +176,22 @@ extern "C" JNIEXPORT jboolean JNICALL Java_art_Test901_checkUnattached( return res == JVMTI_ERROR_UNATTACHED_THREAD; } +extern "C" JNIEXPORT jstring JNICALL Java_art_Test901_getErrorName( + JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jint error) { + char* name; + jvmtiError res = jvmti_env->GetErrorName(static_cast<jvmtiError>(error), &name); + if (JvmtiErrorToException(env, jvmti_env, res)) { + return nullptr; + } + + jstring ret_string = env->NewStringUTF(name); + jvmtiError dealloc = jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name)); + if (JvmtiErrorToException(env, jvmti_env, dealloc)) { + return nullptr; + } + + return ret_string; +} + } // namespace Test901HelloTi } // namespace art diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt index eb5b6a2f93..4177ffc4dc 100644 --- a/test/901-hello-ti-agent/expected.txt +++ b/test/901-hello-ti-agent/expected.txt @@ -10,4 +10,67 @@ Received expected error for unattached JVMTI calls 4 8 JVMTI_ERROR_ILLEGAL_ARGUMENT +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +0 = JVMTI_ERROR_NONE +9 times JVMTI_ERROR_ILLEGAL_ARGUMENT +10 = JVMTI_ERROR_INVALID_THREAD +11 = JVMTI_ERROR_INVALID_THREAD_GROUP +12 = JVMTI_ERROR_INVALID_PRIORITY +13 = JVMTI_ERROR_THREAD_NOT_SUSPENDED +14 = JVMTI_ERROR_THREAD_SUSPENDED +15 = JVMTI_ERROR_THREAD_NOT_ALIVE +4 times JVMTI_ERROR_ILLEGAL_ARGUMENT +20 = JVMTI_ERROR_INVALID_OBJECT +21 = JVMTI_ERROR_INVALID_CLASS +22 = JVMTI_ERROR_CLASS_NOT_PREPARED +23 = JVMTI_ERROR_INVALID_METHODID +24 = JVMTI_ERROR_INVALID_LOCATION +25 = JVMTI_ERROR_INVALID_FIELDID +5 times JVMTI_ERROR_ILLEGAL_ARGUMENT +31 = JVMTI_ERROR_NO_MORE_FRAMES +32 = JVMTI_ERROR_OPAQUE_FRAME +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +34 = JVMTI_ERROR_TYPE_MISMATCH +35 = JVMTI_ERROR_INVALID_SLOT +4 times JVMTI_ERROR_ILLEGAL_ARGUMENT +40 = JVMTI_ERROR_DUPLICATE +41 = JVMTI_ERROR_NOT_FOUND +8 times JVMTI_ERROR_ILLEGAL_ARGUMENT +50 = JVMTI_ERROR_INVALID_MONITOR +51 = JVMTI_ERROR_NOT_MONITOR_OWNER +52 = JVMTI_ERROR_INTERRUPT +7 times JVMTI_ERROR_ILLEGAL_ARGUMENT +60 = JVMTI_ERROR_INVALID_CLASS_FORMAT +61 = JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION +62 = JVMTI_ERROR_FAILS_VERIFICATION +63 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED +64 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED +65 = JVMTI_ERROR_INVALID_TYPESTATE +66 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED +67 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED +68 = JVMTI_ERROR_UNSUPPORTED_VERSION +69 = JVMTI_ERROR_NAMES_DONT_MATCH +70 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED +71 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED +7 times JVMTI_ERROR_ILLEGAL_ARGUMENT +79 = JVMTI_ERROR_UNMODIFIABLE_CLASS +18 times JVMTI_ERROR_ILLEGAL_ARGUMENT +98 = JVMTI_ERROR_NOT_AVAILABLE +99 = JVMTI_ERROR_MUST_POSSESS_CAPABILITY +100 = JVMTI_ERROR_NULL_POINTER +101 = JVMTI_ERROR_ABSENT_INFORMATION +102 = JVMTI_ERROR_INVALID_EVENT_TYPE +103 = JVMTI_ERROR_ILLEGAL_ARGUMENT +104 = JVMTI_ERROR_NATIVE_METHOD +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +106 = JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED +3 times JVMTI_ERROR_ILLEGAL_ARGUMENT +110 = JVMTI_ERROR_OUT_OF_MEMORY +111 = JVMTI_ERROR_ACCESS_DENIED +112 = JVMTI_ERROR_WRONG_PHASE +113 = JVMTI_ERROR_INTERNAL +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +115 = JVMTI_ERROR_UNATTACHED_THREAD +116 = JVMTI_ERROR_INVALID_ENVIRONMENT +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT VMDeath diff --git a/test/901-hello-ti-agent/src/art/Test901.java b/test/901-hello-ti-agent/src/art/Test901.java index eef2188612..7d853a7d51 100644 --- a/test/901-hello-ti-agent/src/art/Test901.java +++ b/test/901-hello-ti-agent/src/art/Test901.java @@ -32,6 +32,8 @@ public class Test901 { set(2); // CLASS set(4); // JNI set(8); // Error. + + testErrorNames(); } private static void set(int i) { @@ -44,7 +46,39 @@ public class Test901 { } } + private static void testErrorNames() { + int consecutiveErrors = 0; + String lastError = null; + for (int i = -1; i <= 117; i++) { + String errorName = null; + String error = null; + try { + errorName = getErrorName(i); + } catch (RuntimeException e) { + error = e.getMessage(); + } + + if (lastError != null && + (errorName != null || (error != null && !lastError.equals(error)))) { + System.out.println(consecutiveErrors + " times " + lastError); + lastError = null; + consecutiveErrors = 0; + } + + if (errorName != null) { + System.out.println(i + " = " + errorName); + } else { + lastError = error; + consecutiveErrors++; + } + } + if (consecutiveErrors > 0) { + System.out.println(consecutiveErrors + " times " + lastError); + } + } + private static native boolean checkLivePhase(); private static native void setVerboseFlag(int flag, boolean value); private static native boolean checkUnattached(); + private static native String getErrorName(int error); } diff --git a/test/912-classes/src/art/Test912Art.java b/test/912-classes/src/art/Test912Art.java index 6da3cadefe..a1e7ff2005 100644 --- a/test/912-classes/src/art/Test912Art.java +++ b/test/912-classes/src/art/Test912Art.java @@ -39,7 +39,7 @@ public class Test912Art { // run in configurations where dex2oat didn't verify the class itself. So explicitly // check whether the class has been already loaded, and skip then. // TODO: Add multiple configurations to the run script once that becomes easier to do. - if (hasJit() && !isLoadedClass("art.Test912Art$ClassD")) { + if (hasJit() && !isLoadedClass("Lart/Test912Art$ClassD;")) { testClassEventsJit(); } } diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt index 702b247819..b128d1cb70 100644 --- a/test/913-heaps/expected.txt +++ b/test/913-heaps/expected.txt @@ -385,3 +385,10 @@ root@root --(thread)--> 1@1000 [size=16, length=-1] 5@1002 --(field@10)--> 1@1000 [size=16, length=-1] 5@1002 --(field@9)--> 6@1000 [size=16, length=-1] --- + +default +image +zygote +app + +3 diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc index e319f7d98c..ec36cebd43 100644 --- a/test/913-heaps/heaps.cc +++ b/test/913-heaps/heaps.cc @@ -817,5 +817,266 @@ extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getGcFinishes(JNIEnv* env ATT return result; } +using GetObjectHeapId = jvmtiError(*)(jvmtiEnv*, jlong, jint*, ...); +static GetObjectHeapId gGetObjectHeapIdFn = nullptr; + +using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...); +static GetHeapName gGetHeapNameFn = nullptr; + +using IterateThroughHeapExt = jvmtiError(*)(jvmtiEnv*, + jint, + jclass, + const jvmtiHeapCallbacks*, + const void*); +static IterateThroughHeapExt gIterateThroughHeapExt = nullptr; + + +static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) { + for (size_t i = 0; i != static_cast<size_t>(count); ++i) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].short_description)); + for (size_t j = 0; j != static_cast<size_t>(extensions[i].param_count); ++j) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params[j].name)); + } + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].errors)); + } +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) { + jint extension_count; + jvmtiExtensionFunctionInfo* extensions; + jvmtiError result = jvmti_env->GetExtensionFunctions(&extension_count, &extensions); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return; + } + + for (size_t i = 0; i != static_cast<size_t>(extension_count); ++i) { + if (strcmp("com.android.art.heap.get_object_heap_id", extensions[i].id) == 0) { + CHECK(gGetObjectHeapIdFn == nullptr); + gGetObjectHeapIdFn = reinterpret_cast<GetObjectHeapId>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("tag", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JLONG); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_OUT); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_NOT_FOUND); + + continue; + } + + if (strcmp("com.android.art.heap.get_heap_name", extensions[i].id) == 0) { + CHECK(gGetHeapNameFn == nullptr); + gGetHeapNameFn = reinterpret_cast<GetHeapName>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_name", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_CCHAR); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_ALLOC_BUF); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT); + } + + if (strcmp("com.android.art.heap.iterate_through_heap_ext", extensions[i].id) == 0) { + CHECK(gIterateThroughHeapExt == nullptr); + gIterateThroughHeapExt = reinterpret_cast<IterateThroughHeapExt>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 4); + + CHECK_EQ(strcmp("heap_filter", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("klass", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JCLASS); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_IN); + CHECK_EQ(extensions[i].params[1].null_ok, true); + + CHECK_EQ(strcmp("callbacks", extensions[i].params[2].name), 0); + CHECK_EQ(extensions[i].params[2].base_type, JVMTI_TYPE_CVOID); + CHECK_EQ(extensions[i].params[2].kind, JVMTI_KIND_IN_PTR); + CHECK_EQ(extensions[i].params[2].null_ok, false); + + CHECK_EQ(strcmp("user_data", extensions[i].params[3].name), 0); + CHECK_EQ(extensions[i].params[3].base_type, JVMTI_TYPE_CVOID); + CHECK_EQ(extensions[i].params[3].kind, JVMTI_KIND_IN_PTR); + CHECK_EQ(extensions[i].params[3].null_ok, true); + + CHECK_EQ(extensions[i].error_count, 3); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_MUST_POSSESS_CAPABILITY); + CHECK(extensions[i].errors[1] == JVMTI_ERROR_INVALID_CLASS); + CHECK(extensions[i].errors[2] == JVMTI_ERROR_NULL_POINTER); + } + } + + CHECK(gGetObjectHeapIdFn != nullptr); + CHECK(gGetHeapNameFn != nullptr); + + FreeExtensionFunctionInfo(extensions, extension_count); +} + +extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getObjectHeapId( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) { + CHECK(gGetObjectHeapIdFn != nullptr); + jint heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, tag, &heap_id); + JvmtiErrorToException(env, jvmti_env, result); + return heap_id; +} + +extern "C" JNIEXPORT jstring JNICALL Java_art_Test913_getHeapName( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jint heap_id) { + CHECK(gGetHeapNameFn != nullptr); + char* heap_name; + jvmtiError result = gGetHeapNameFn(jvmti_env, heap_id, &heap_name); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return nullptr; + } + jstring ret = env->NewStringUTF(heap_name); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(heap_name)); + return ret; +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallback( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag, jint heap_id) { + CHECK(gGetObjectHeapIdFn != nullptr); + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL FollowReferencesCallback( + jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED, + const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED, + jlong class_tag ATTRIBUTE_UNUSED, + jlong referrer_class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jlong* referrer_tag_ptr ATTRIBUTE_UNUSED, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return JVMTI_VISIT_OBJECTS; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_reference_callback = GetObjectHeapIdCallbacks::FollowReferencesCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return 0; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_iteration_callback = GetObjectHeapIdCallbacks::HeapIterationCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } +} + +static bool gFoundExt = false; + +static jint JNICALL HeapIterationExtCallback(jlong class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jint length ATTRIBUTE_UNUSED, + void* user_data ATTRIBUTE_UNUSED, + jint heap_id) { + // We expect some tagged objects at or above the threshold, where the expected heap id is + // encoded into lowest byte. + constexpr jlong kThreshold = 30000000; + jlong tag = *tag_ptr; + if (tag >= kThreshold) { + jint expected_heap_id = static_cast<jint>(tag - kThreshold); + CHECK_EQ(expected_heap_id, heap_id); + gFoundExt = true; + } + return 0; +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_iterateThroughHeapExt( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) { + CHECK(gIterateThroughHeapExt != nullptr); + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_iteration_callback = + reinterpret_cast<decltype(callbacks.heap_iteration_callback)>(HeapIterationExtCallback); + + jvmtiError ret = gIterateThroughHeapExt(jvmti_env, 0, nullptr, &callbacks, nullptr); + JvmtiErrorToException(env, jvmti_env, ret); + CHECK(gFoundExt); +} + } // namespace Test913Heaps } // namespace art diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java index 8800b1a4d7..97f48eea03 100644 --- a/test/913-heaps/src/art/Test913.java +++ b/test/913-heaps/src/art/Test913.java @@ -16,6 +16,9 @@ package art; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -44,6 +47,8 @@ public class Test913 { }; t.start(); cdl1.await(); + + doExtensionTests(); } public static void runFollowReferences() throws Exception { @@ -215,6 +220,59 @@ public class Test913 { System.out.println(getTag(floatObject)); } + static ArrayList<Object> extensionTestHolder; + + private static void doExtensionTests() { + checkForExtensionApis(); + + extensionTestHolder = new ArrayList<>(); + System.out.println(); + + try { + getHeapName(-1); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + System.out.println(getHeapName(0)); + System.out.println(getHeapName(1)); + System.out.println(getHeapName(2)); + System.out.println(getHeapName(3)); + try { + getHeapName(4); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + + System.out.println(); + + setTag(Object.class, 100000); + int objectClassHeapId = getObjectHeapId(100000); + int objClassExpectedHeapId = hasImage() ? 1 : 3; + if (objectClassHeapId != objClassExpectedHeapId) { + throw new RuntimeException("Expected object class in heap " + objClassExpectedHeapId + + " but received " + objectClassHeapId); + } + + A a = new A(); + extensionTestHolder.add(a); + setTag(a, 100001); + System.out.println(getObjectHeapId(100001)); + + checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId); + checkGetObjectHeapIdInCallback(100001, 3); + + long baseTag = 30000000; + setTag(Object.class, baseTag + objClassExpectedHeapId); + setTag(Class.class, baseTag + objClassExpectedHeapId); + Object o = new Object(); + extensionTestHolder.add(o); + setTag(o, baseTag + 3); + + iterateThroughHeapExt(); + + extensionTestHolder = null; + } + private static void runGc() { clearStats(); forceGarbageCollection(); @@ -233,6 +291,24 @@ public class Test913 { System.out.println((s > 0) + " " + (f > 0)); } + private static boolean hasImage() { + try { + int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName()); + BufferedReader reader = new BufferedReader(new FileReader("/proc/" + pid + "/maps")); + String line; + while ((line = reader.readLine()) != null) { + if (line.endsWith(".art")) { + reader.close(); + return true; + } + } + reader.close(); + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private static class TestConfig { private Class<?> klass = null; private int heapFilter = 0; @@ -642,9 +718,16 @@ public class Test913 { private static native int getGcFinishes(); private static native void forceGarbageCollection(); + private static native void checkForExtensionApis(); + private static native int getObjectHeapId(long tag); + private static native String getHeapName(int heapId); + private static native void checkGetObjectHeapIdInCallback(long tag, int heapId); + public static native String[] followReferences(int heapFilter, Class<?> klassFilter, Object initialObject, int stopAfter, int followSet, Object jniRef); public static native String[] followReferencesString(Object initialObject); public static native String followReferencesPrimitiveArray(Object initialObject); public static native String followReferencesPrimitiveFields(Object initialObject); + + private static native void iterateThroughHeapExt(); } diff --git a/test/etc/default-build b/test/etc/default-build index 744c38bb6d..0508b85529 100755 --- a/test/etc/default-build +++ b/test/etc/default-build @@ -82,9 +82,9 @@ JACK_EXPERIMENTAL_ARGS["method-handles"]="-D jack.java.source.version=1.7 -D jac JACK_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24" declare -A SMALI_EXPERIMENTAL_ARGS -SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24" -SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api-level 26" -SMALI_EXPERIMENTAL_ARGS["agents"]="--api-level 26" +SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api 24" +SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api 26" +SMALI_EXPERIMENTAL_ARGS["agents"]="--api 26" declare -A JAVAC_EXPERIMENTAL_ARGS JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8" @@ -275,7 +275,7 @@ fi if [ "${HAS_SMALI}" = "true" -a ${NEED_DEX} = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${SKIP_DX_MERGER} = "false" ]; then @@ -287,7 +287,7 @@ fi if [ "${HAS_SMALI_MULTIDEX}" = "true" -a ${NEED_DEX} = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${HAS_SRC_MULTIDEX} = "true" ]; then diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index bb99e1cb50..f75055674e 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -564,6 +564,11 @@ if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then profman_cmdline="${ANDROID_ROOT}/bin/profman \ --apk=$DEX_LOCATION/$TEST_NAME.jar \ --dex-location=$DEX_LOCATION/$TEST_NAME.jar" + if [ -f $DEX_LOCATION/$TEST_NAME-ex.jar ]; then + profman_cmdline="${profman_cmdline} \ + --apk=$DEX_LOCATION/$TEST_NAME-ex.jar \ + --dex-location=$DEX_LOCATION/$TEST_NAME-ex.jar" + fi COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof" FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof" if [ "$PROFILE" = "y" ]; then diff --git a/test/knownfailures.json b/test/knownfailures.json index ea810db1ac..659b814561 100644 --- a/test/knownfailures.json +++ b/test/knownfailures.json @@ -328,11 +328,6 @@ "variant": "interpreter | optimizing | regalloc_gc | jit" }, { - "tests": ["912-classes"], - "bug": "http://b/36344364", - "variant": "no-dex2oat | relocate-npatchoat" - }, - { "tests": ["476-clinit-inline-static-invoke", "496-checker-inlining-class-loader", "508-referrer-method", @@ -682,5 +677,12 @@ "variant": "debug", "description": "Test disabled in debug mode because of dex2oatd timeouts.", "bug": "b/33650497" + }, + { + "tests": "640-checker-integer-valueof", + "description": [ + "The java.lang.Integer.valueOf intrinsic is not supported in PIC mode." + ], + "variant": "optimizing & pictest | speed-profile & pictest" } ] diff --git a/test/run-test b/test/run-test index f60f766751..933a7febac 100755 --- a/test/run-test +++ b/test/run-test @@ -46,7 +46,7 @@ export RUN="${progdir}/etc/run-test-jar" export DEX_LOCATION=/data/run-test/${test_dir} export NEED_DEX="true" export USE_JACK="true" -export SMALI_ARGS="--experimental" +export SMALI_ARGS="" # If dx was not set by the environment variable, assume it is in the path. if [ -z "$DX" ]; then diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py index a80924639b..9a437cc822 100755 --- a/test/testrunner/testrunner.py +++ b/test/testrunner/testrunner.py @@ -497,7 +497,11 @@ def run_test(command, test, test_variant, test_name): test_skipped = True else: test_skipped = False - proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE, universal_newlines=True) + if gdb: + proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, universal_newlines=True) + else: + proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout = subprocess.PIPE, + universal_newlines=True) script_output = proc.communicate(timeout=timeout)[0] test_passed = not proc.wait() |