diff options
69 files changed, 2867 insertions, 1691 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index a2b07af810..df896dc73c 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -115,6 +115,7 @@ art_cc_defaults { "optimizing/intrinsics_arm.cc", "optimizing/intrinsics_arm_vixl.cc", "optimizing/nodes_shared.cc", + "optimizing/scheduler_arm.cc", "utils/arm/assembler_arm.cc", "utils/arm/assembler_arm_vixl.cc", "utils/arm/assembler_thumb2.cc", diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index ced52ff07a..a98aedfc69 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -18,6 +18,7 @@ #include "arch/arm/asm_support_arm.h" #include "art_method.h" +#include "base/bit_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "lock_word.h" @@ -112,12 +113,22 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co // Check that the next instruction matches the expected LDR. switch (kind) { case BakerReadBarrierKind::kField: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (immediate) with correct base_reg. - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code->size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(code, literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } break; } case BakerReadBarrierKind::kArray: { @@ -131,11 +142,20 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co break; } case BakerReadBarrierKind::kGcRoot: { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); - // LDR (immediate) with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } break; } default: @@ -160,7 +180,8 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, vixl::aarch32::Register base_reg, vixl::aarch32::MemOperand& lock_word, - vixl::aarch32::Label* slow_path) { + vixl::aarch32::Label* slow_path, + int32_t raw_ldr_offset) { using namespace vixl::aarch32; // NOLINT(build/namespaces) // Load the lock word containing the rb_state. __ Ldr(ip, lock_word); @@ -169,14 +190,7 @@ static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); __ B(ne, slow_path, /* is_far_target */ false); - static_assert( - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, - "Field and array LDR offsets must be the same to reuse the same code."); - // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Add(lr, lr, raw_ldr_offset); // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). @@ -199,6 +213,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& CheckValidReg(base_reg.GetCode()); Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -210,16 +225,30 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& } vixl::aarch32::Label slow_path; MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); __ Bind(&slow_path); const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET; - MemOperand ldr_half_address(lr, ldr_offset + 2); - __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". - __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. - __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + raw_ldr_offset; + Register ep_reg(kBakerCcEntrypointRegister); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint. + __ Bx(ep_reg); // Jump to the entrypoint. if (holder_reg.Is(base_reg)) { // Add null check slow path. The stack map is at the address pointed to by LR. __ Bind(&throw_npe); @@ -233,6 +262,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); CheckValidReg(base_reg.GetCode()); DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); vixl::aarch32::Label slow_path; @@ -240,10 +270,11 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); DCHECK_LT(lock_word.GetOffsetImmediate(), 0); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); __ Bind(&slow_path); const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + raw_ldr_offset; MemOperand ldr_address(lr, ldr_offset + 2); __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", // i.e. Rm+32 because the scale in imm2 is 2. @@ -261,6 +292,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); CheckValidReg(root_reg.GetCode()); DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); vixl::aarch32::Label return_label, not_marked, forwarding_address; @@ -280,7 +312,10 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister // to art_quick_read_barrier_mark_introspection_gc_roots. Register ep_reg(kBakerCcEntrypointRegister); - __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); __ Mov(ip, root_reg); __ Bx(ep_reg); __ Bind(&forwarding_address); @@ -344,7 +379,7 @@ uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); uint8_t* addr = &(*code)[offset]; addr[0] = (value >> 16) & 0xff; addr[1] = (value >> 24) & 0xff; @@ -354,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); const uint8_t* addr = &code[offset]; return (static_cast<uint32_t>(addr[0]) << 16) + @@ -369,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { return GetInsn32(ArrayRef<const uint8_t>(*code), offset); } +uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 2u, code.size()); + DCHECK_ALIGNED(offset, 2u); + const uint8_t* addr = &code[offset]; + return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); +} + +template <typename Vector> +uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { + static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); + return GetInsn16(ArrayRef<const uint8_t>(*code), offset); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index 7fad245856..7e787d2916 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -35,26 +35,37 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: static constexpr uint32_t kBakerCcEntrypointRegister = 4u; - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { CheckValidReg(base_reg); CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg); + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); } static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { CheckValidReg(base_reg); return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); } - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); } explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); @@ -86,6 +97,12 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { kLast }; + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast + }; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 4u; @@ -95,9 +112,14 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; using BakerReadBarrierSecondRegField = BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, + kBitsForBakerReadBarrierWidth>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister); + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; } void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); @@ -108,6 +130,11 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { template <typename Vector> static uint32_t GetInsn32(Vector* code, uint32_t offset); + static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Vector> + static uint32_t GetInsn16(Vector* code, uint32_t offset); + friend class Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index 2e28349231..af5fa40dc1 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -52,6 +52,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. + static constexpr uint32_t kLdrInsn = 0x6800u; + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. static constexpr uint32_t kLdrWInsn = 0xf8d00000u; @@ -223,9 +226,11 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -237,9 +242,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -260,7 +265,8 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { (static_cast<uint32_t>(output_[offset + 1]) << 8); } - void TestBakerField(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -568,7 +574,7 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } -void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { +void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { uint32_t valid_regs[] = { 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. 8, 9, 10, 11, // IP, SP, LR and PC are reserved. @@ -584,8 +590,8 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ false); const LinkerPatch patches[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), }; @@ -608,7 +614,113 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg ASSERT_TRUE( CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 32u); + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ true); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); ASSERT_GT(output_.size(), thunk_offset); ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, @@ -666,15 +778,26 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg } } -#define TEST_BAKER_FIELD(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffset##offset##_##ref_reg) { \ - TestBakerField(offset, ref_reg); \ +#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetWide##offset##_##ref_reg) { \ + TestBakerFieldWide(offset, ref_reg); \ } -TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7) -TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11) +TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) +TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) + +#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetNarrow##offset##_##ref_reg) { \ + TestBakerFieldNarrow(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { // One thunk in the middle with maximum distance branches to it from both sides. @@ -682,8 +805,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { constexpr uint32_t kLiteralOffset1 = 6u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -710,7 +833,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { // - thunk size and method 3 pre-header, rounded up (padding in between if needed) // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); size_t filler2_size = 1 * MB - (kLiteralOffset2 + kPcAdjustment) - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) @@ -749,8 +873,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { constexpr uint32_t kLiteralOffset1 = 4u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -779,8 +903,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast constexpr uint32_t kLiteralOffset1 = 6u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -809,7 +933,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast // - thunk size and method 3 pre-header, rounded up (padding in between if needed) // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); size_t filler2_size = 1 * MB - (kReachableFromOffset2 + kPcAdjustment) - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) @@ -929,7 +1054,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) { } } -TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { +TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { uint32_t valid_regs[] = { 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. 8, 9, 10, 11, // IP, SP, LR and PC are reserved. @@ -945,7 +1070,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ArrayRef<const uint8_t> code(raw_code); const LinkerPatch patches[] = { LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), }; AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); } @@ -962,7 +1088,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ASSERT_EQ(kMethodCodeSize, expected_code.size()); EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); ASSERT_GT(output_.size(), thunk_offset); ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, @@ -972,7 +1098,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ASSERT_TRUE(false); } - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + // Verify that the fast-path null-check uses the correct register, i.e. root_reg. if (root_reg < 8) { ASSERT_GE(output_.size() - thunk_offset, 2u); ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); @@ -988,6 +1114,60 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { } } +TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + // Not appplicable to high registers. + }; + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 2u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { // Test 1MiB of patches to the same thunk to stress-test different large offsets. // (The low bits are not that important but the location of the high bits is easy to get wrong.) @@ -998,7 +1178,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { patches.reserve(num_patches); const uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); for (size_t i = 0; i != num_patches; ++i) { PushBackInsn(&code, ldr); PushBackInsn(&code, kBneWPlus0); @@ -1067,7 +1248,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { // this pushes the first GC root thunk's pending MaxNextOffset() before the method call // thunk's pending MaxNextOffset() which needs to be adjusted. ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, - CompileBakerGcRootThunk(/* root_reg */ 0).size()); + CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; @@ -1080,9 +1261,9 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { ldr2, kBneWPlus0, // Second GC root LDR with read barrier. }); uint32_t encoded_data1 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); uint32_t encoded_data2 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); const LinkerPatch last_method_patches[] = { LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index d1ab410a7e..02a5b1ef8f 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -100,7 +100,7 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u); + DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; } void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index 5e70a8284d..1e75f10ebe 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -310,16 +310,18 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // least one predecessor is not covered by the same TryItem as the try block. // We do not split each edge separately, but rather create one boundary block // that all predecessors are relinked to. This preserves loop headers (b/23895756). - for (auto entry : try_block_info) { - HBasicBlock* try_block = graph_->GetBlocks()[entry.first]; + for (const auto& entry : try_block_info) { + uint32_t block_id = entry.first; + const DexFile::TryItem* try_item = entry.second; + HBasicBlock* try_block = graph_->GetBlocks()[block_id]; for (HBasicBlock* predecessor : try_block->GetPredecessors()) { - if (GetTryItem(predecessor, try_block_info) != entry.second) { + if (GetTryItem(predecessor, try_block_info) != try_item) { // Found a predecessor not covered by the same TryItem. Insert entering // boundary block. HTryBoundary* try_entry = new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc()); try_block->CreateImmediateDominator()->AddInstruction(try_entry); - LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks); + LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks); break; } } @@ -327,8 +329,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // Do a second pass over the try blocks and insert exit TryBoundaries where // the successor is not in the same TryItem. - for (auto entry : try_block_info) { - HBasicBlock* try_block = graph_->GetBlocks()[entry.first]; + for (const auto& entry : try_block_info) { + uint32_t block_id = entry.first; + const DexFile::TryItem* try_item = entry.second; + HBasicBlock* try_block = graph_->GetBlocks()[block_id]; // NOTE: Do not use iterators because SplitEdge would invalidate them. for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { HBasicBlock* successor = try_block->GetSuccessors()[i]; @@ -337,7 +341,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // covered by the same TryItem. Otherwise the previous pass would have // created a non-throwing boundary block. if (GetTryItem(successor, try_block_info) != nullptr) { - DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info)); + DCHECK_EQ(try_item, GetTryItem(successor, try_block_info)); continue; } @@ -345,7 +349,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { HTryBoundary* try_exit = new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc()); graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit); - LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks); + LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks); } } } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index ed630cda91..f3ecdf036a 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1734,8 +1734,8 @@ class BCEVisitor : public HGraphVisitor { */ void InsertPhiNodes() { // Scan all new deoptimization blocks. - for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) { - HBasicBlock* true_block = it1->second; + for (const auto& entry : taken_test_loop_) { + HBasicBlock* true_block = entry.second; HBasicBlock* new_preheader = true_block->GetSingleSuccessor(); // Scan all instructions in a new deoptimization block. for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) { diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5136d7d2b8..65f3c72e99 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -145,7 +145,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { } size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { - auto pointer_size = InstructionSetPointerSize(GetInstructionSet()); + PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet()); return static_cast<size_t>(pointer_size) * index; } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ea463eeb62..9ef692aaf0 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -842,7 +842,7 @@ class SlowPathGenerator { const uint32_t dex_pc = instruction->GetDexPc(); auto iter = slow_path_map_.find(dex_pc); if (iter != slow_path_map_.end()) { - auto candidates = iter->second; + const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; for (const auto& it : candidates) { InstructionType* other_instruction = it.first; SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1990e8f67d..ab3d499235 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -90,13 +90,17 @@ static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instru } static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { - DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit()); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler())); __ BindTrackedLabel(bne_label); Label placeholder_label; __ b(&placeholder_label, NE); // Placeholder, patched at link-time. __ Bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) { + return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u; +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -1652,34 +1656,6 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) { } } -static int64_t AdjustConstantForCondition(int64_t value, - IfCondition* condition, - IfCondition* opposite) { - if (value == 1) { - if (*condition == kCondB) { - value = 0; - *condition = kCondEQ; - *opposite = kCondNE; - } else if (*condition == kCondAE) { - value = 0; - *condition = kCondNE; - *opposite = kCondEQ; - } - } else if (value == -1) { - if (*condition == kCondGT) { - value = 0; - *condition = kCondGE; - *opposite = kCondLT; - } else if (*condition == kCondLE) { - value = 0; - *condition = kCondLT; - *opposite = kCondGE; - } - } - - return value; -} - static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition, bool invert, CodeGeneratorARM* codegen) { @@ -1693,7 +1669,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond std::swap(cond, opposite); } - std::pair<Condition, Condition> ret(EQ, NE); + std::pair<Condition, Condition> ret; const Location left = locations->InAt(0); const Location right = locations->InAt(1); @@ -1701,38 +1677,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond const Register left_high = left.AsRegisterPairHigh<Register>(); const Register left_low = left.AsRegisterPairLow<Register>(); - int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(), - &cond, - &opposite); - - // Comparisons against 0 are common enough to deserve special attention. - if (value == 0) { - switch (cond) { - case kCondNE: - // x > 0 iff x != 0 when the comparison is unsigned. - case kCondA: - ret = std::make_pair(NE, EQ); - FALLTHROUGH_INTENDED; - case kCondEQ: - // x <= 0 iff x == 0 when the comparison is unsigned. - case kCondBE: - __ orrs(IP, left_low, ShifterOperand(left_high)); - return ret; - case kCondLT: - case kCondGE: - __ cmp(left_high, ShifterOperand(0)); - return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); - // Trivially true or false. - case kCondB: - ret = std::make_pair(NE, EQ); - FALLTHROUGH_INTENDED; - case kCondAE: - __ cmp(left_low, ShifterOperand(left_low)); - return ret; - default: - break; - } - } + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); switch (cond) { case kCondEQ: @@ -1892,14 +1837,10 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition, static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); + const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - IfCondition c = condition->GetCondition(); - IfCondition opposite = condition->GetOppositeCondition(); - const int64_t value = AdjustConstantForCondition( - Int64FromConstant(locations->InAt(1).GetConstant()), - &c, - &opposite); + const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); ShifterOperand so; if (c < kCondLT || c > kCondGE) { @@ -1907,11 +1848,9 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used; 0 is always handled, no matter what registers are - // used by the first input. - if (value != 0 && - (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || - !IsUint<8>(Low32Bits(value)))) { + // encoding can be used. + if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) || + !IsUint<8>(Low32Bits(value))) { return false; } } else if (c == kCondLE || c == kCondGT) { @@ -1938,329 +1877,6 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) { return true; } -static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) { - DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); - - const Register out = cond->GetLocations()->Out().AsRegister<Register>(); - const auto condition = GenerateTest(cond, false, codegen); - - __ mov(out, ShifterOperand(0), AL, kCcKeep); - - if (ArmAssembler::IsLowRegister(out)) { - __ it(condition.first); - __ mov(out, ShifterOperand(1), condition.first); - } else { - Label done_label; - Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - - __ b(final_label, condition.second); - __ LoadImmediate(out, 1); - - if (done_label.IsLinked()) { - __ Bind(&done_label); - } - } -} - -static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); - - const LocationSummary* const locations = cond->GetLocations(); - IfCondition condition = cond->GetCondition(); - const Register out = locations->Out().AsRegister<Register>(); - const Location left = locations->InAt(0); - const Location right = locations->InAt(1); - Register left_high = left.AsRegisterPairHigh<Register>(); - Register left_low = left.AsRegisterPairLow<Register>(); - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), - &condition, - &opposite); - int32_t value_high = -High32Bits(value); - int32_t value_low = -Low32Bits(value); - - // The output uses Location::kNoOutputOverlap. - if (out == left_high) { - std::swap(left_low, left_high); - std::swap(value_low, value_high); - } - - __ AddConstant(out, left_low, value_low); - __ AddConstant(IP, left_high, value_high); - } else { - DCHECK(right.IsRegisterPair()); - __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>())); - __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>())); - } - - // Need to check after calling AdjustConstantForCondition(). - DCHECK(condition == kCondEQ || condition == kCondNE) << condition; - - if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { - __ orrs(out, out, ShifterOperand(IP)); - __ it(NE); - __ mov(out, ShifterOperand(1), NE); - } else { - __ orr(out, out, ShifterOperand(IP)); - codegen->GenerateConditionWithZero(condition, out, out, IP); - } -} - -static void GenerateLongComparesAndJumps(HCondition* cond, - Label* true_label, - Label* false_label, - CodeGeneratorARM* codegen) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - Register left_high = left.AsRegisterPairHigh<Register>(); - Register left_low = left.AsRegisterPairLow<Register>(); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ CmpConstant(left_high, val_high); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ CmpConstant(left_low, val_low); - } else { - Register right_high = right.AsRegisterPairHigh<Register>(); - Register right_low = right.AsRegisterPairLow<Register>(); - - __ cmp(left_high, ShifterOperand(right_high)); - if (if_cond == kCondNE) { - __ b(true_label, ARMCondition(true_high_cond)); - } else if (if_cond == kCondEQ) { - __ b(false_label, ARMCondition(false_high_cond)); - } else { - __ b(true_label, ARMCondition(true_high_cond)); - __ b(false_label, ARMCondition(false_high_cond)); - } - // Must be equal high, so compare the lows. - __ cmp(left_low, ShifterOperand(right_low)); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ b(true_label, final_condition); -} - -static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); - - const LocationSummary* const locations = cond->GetLocations(); - IfCondition condition = cond->GetCondition(); - const Register out = locations->Out().AsRegister<Register>(); - const Location left = locations->InAt(0); - const Location right = locations->InAt(1); - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - - // Comparisons against 0 are common enough to deserve special attention. - if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), - &condition, - &opposite) == 0) { - switch (condition) { - case kCondNE: - case kCondA: - if (ArmAssembler::IsLowRegister(out)) { - // We only care if both input registers are 0 or not. - __ orrs(out, - left.AsRegisterPairLow<Register>(), - ShifterOperand(left.AsRegisterPairHigh<Register>())); - __ it(NE); - __ mov(out, ShifterOperand(1), NE); - return; - } - - FALLTHROUGH_INTENDED; - case kCondEQ: - case kCondBE: - // We only care if both input registers are 0 or not. - __ orr(out, - left.AsRegisterPairLow<Register>(), - ShifterOperand(left.AsRegisterPairHigh<Register>())); - codegen->GenerateConditionWithZero(condition, out, out); - return; - case kCondLT: - case kCondGE: - // We only care about the sign bit. - FALLTHROUGH_INTENDED; - case kCondAE: - case kCondB: - codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>()); - return; - case kCondLE: - case kCondGT: - default: - break; - } - } - } - - if ((condition == kCondEQ || condition == kCondNE) && - // If `out` is a low register, then the GenerateConditionGeneric() - // function generates a shorter code sequence that is still branchless. - (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) { - GenerateEqualLong(cond, codegen); - return; - } - - if (CanGenerateTest(cond, codegen->GetAssembler())) { - GenerateConditionGeneric(cond, codegen); - return; - } - - // Convert the jumps into the result. - Label done_label; - Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - Label true_label, false_label; - - GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); - - // False case: result = 0. - __ Bind(&false_label); - __ mov(out, ShifterOperand(0)); - __ b(final_label); - - // True case: result = 1. - __ Bind(&true_label); - __ mov(out, ShifterOperand(1)); - - if (done_label.IsLinked()) { - __ Bind(&done_label); - } -} - -static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) { - const Primitive::Type type = cond->GetLeft()->GetType(); - - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - - if (type == Primitive::kPrimLong) { - GenerateConditionLong(cond, codegen); - return; - } - - const LocationSummary* const locations = cond->GetLocations(); - IfCondition condition = cond->GetCondition(); - Register in = locations->InAt(0).AsRegister<Register>(); - const Register out = locations->Out().AsRegister<Register>(); - const Location right = cond->GetLocations()->InAt(1); - int64_t value; - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - - value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()), - &condition, - &opposite); - - // Comparisons against 0 are common enough to deserve special attention. - if (value == 0) { - switch (condition) { - case kCondNE: - case kCondA: - if (ArmAssembler::IsLowRegister(out) && out == in) { - __ cmp(out, ShifterOperand(0)); - __ it(NE); - __ mov(out, ShifterOperand(1), NE); - return; - } - - FALLTHROUGH_INTENDED; - case kCondEQ: - case kCondBE: - case kCondLT: - case kCondGE: - case kCondAE: - case kCondB: - codegen->GenerateConditionWithZero(condition, out, in); - return; - case kCondLE: - case kCondGT: - default: - break; - } - } - } - - if (condition == kCondEQ || condition == kCondNE) { - ShifterOperand operand; - - if (right.IsConstant()) { - operand = ShifterOperand(value); - } else if (out == right.AsRegister<Register>()) { - // Avoid 32-bit instructions if possible. - operand = ShifterOperand(in); - in = right.AsRegister<Register>(); - } else { - operand = ShifterOperand(right.AsRegister<Register>()); - } - - if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) { - __ subs(out, in, operand); - __ it(NE); - __ mov(out, ShifterOperand(1), NE); - } else { - __ sub(out, in, operand); - codegen->GenerateConditionWithZero(condition, out, out); - } - - return; - } - - GenerateConditionGeneric(cond, codegen); -} - static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2867,6 +2483,89 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } +void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, + Label* true_label, + Label* false_label) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + Register left_high = left.AsRegisterPairHigh<Register>(); + Register left_low = left.AsRegisterPairLow<Register>(); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ CmpConstant(left_high, val_high); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ CmpConstant(left_low, val_low); + } else { + Register right_high = right.AsRegisterPairHigh<Register>(); + Register right_low = right.AsRegisterPairLow<Register>(); + + __ cmp(left_high, ShifterOperand(right_high)); + if (if_cond == kCondNE) { + __ b(true_label, ARMCondition(true_high_cond)); + } else if (if_cond == kCondEQ) { + __ b(false_label, ARMCondition(false_high_cond)); + } else { + __ b(true_label, ARMCondition(true_high_cond)); + __ b(false_label, ARMCondition(false_high_cond)); + } + // Must be equal high, so compare the lows. + __ cmp(left_low, ShifterOperand(right_low)); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ b(true_label, final_condition); +} + void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, Label* true_target_in, Label* false_target_in) { @@ -2901,7 +2600,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); + GenerateLongComparesAndJumps(condition, true_target, false_target); if (false_target != &fallthrough_target) { __ b(false_target); @@ -3216,80 +2915,6 @@ void CodeGeneratorARM::GenerateNop() { __ nop(); } -// `temp` is an extra temporary register that is used for some conditions; -// callers may not specify it, in which case the method will use a scratch -// register instead. -void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition, - Register out, - Register in, - Register temp) { - switch (condition) { - case kCondEQ: - // x <= 0 iff x == 0 when the comparison is unsigned. - case kCondBE: - if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) { - temp = out; - } - - // Avoid 32-bit instructions if possible; note that `in` and `temp` must be - // different as well. - if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) { - // temp = - in; only 0 sets the carry flag. - __ rsbs(temp, in, ShifterOperand(0)); - - if (out == in) { - std::swap(in, temp); - } - - // out = - in + in + carry = carry - __ adc(out, temp, ShifterOperand(in)); - } else { - // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. - __ clz(out, in); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); - } - - break; - case kCondNE: - // x > 0 iff x != 0 when the comparison is unsigned. - case kCondA: - if (out == in) { - if (temp == kNoRegister || in == temp) { - temp = IP; - } - } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) { - temp = out; - } - - // temp = in - 1; only 0 does not set the carry flag. - __ subs(temp, in, ShifterOperand(1)); - // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry - __ sbc(out, in, ShifterOperand(temp)); - break; - case kCondGE: - __ mvn(out, ShifterOperand(in)); - in = out; - FALLTHROUGH_INTENDED; - case kCondLT: - // We only care about the sign bit. - __ Lsr(out, in, 31); - break; - case kCondAE: - // Trivially true. - __ mov(out, ShifterOperand(1)); - break; - case kCondB: - // Trivially false. - __ mov(out, ShifterOperand(0)); - break; - default: - LOG(FATAL) << "Unexpected condition " << condition; - UNREACHABLE(); - } -} - void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -3326,42 +2951,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { return; } - const Primitive::Type type = cond->GetLeft()->GetType(); + const Register out = cond->GetLocations()->Out().AsRegister<Register>(); + + if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) { + const auto condition = GenerateTest(cond, false, codegen_); - if (Primitive::IsFloatingPointType(type)) { - GenerateConditionGeneric(cond, codegen_); + __ it(condition.first); + __ mov(out, ShifterOperand(1), condition.first); + __ it(condition.second); + __ mov(out, ShifterOperand(0), condition.second); return; } - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - - if (type == Primitive::kPrimBoolean) { - const LocationSummary* const locations = cond->GetLocations(); - const IfCondition c = cond->GetCondition(); - Register left = locations->InAt(0).AsRegister<Register>(); - const Register out = locations->Out().AsRegister<Register>(); - const Location right_loc = locations->InAt(1); + // Convert the jumps into the result. + Label done_label; + Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); - // All other cases are handled by the instruction simplifier. - DCHECK((c == kCondEQ || c == kCondNE) && !right_loc.IsConstant()); + if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { + Label true_label, false_label; - Register right = right_loc.AsRegister<Register>(); + GenerateLongComparesAndJumps(cond, &true_label, &false_label); - // Avoid 32-bit instructions if possible. - if (out == right) { - std::swap(left, right); - } + // False case: result = 0. + __ Bind(&false_label); + __ LoadImmediate(out, 0); + __ b(final_label); - __ eor(out, left, ShifterOperand(right)); + // True case: result = 1. + __ Bind(&true_label); + __ LoadImmediate(out, 1); + } else { + DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); - if (c == kCondEQ) { - __ eor(out, out, ShifterOperand(1)); - } + const auto condition = GenerateTest(cond, false, codegen_); - return; + __ mov(out, ShifterOperand(0), AL, kCcKeep); + __ b(final_label, condition.second); + __ LoadImmediate(out, 1); } - GenerateConditionIntegralOrNonPrimitive(cond, codegen_); + if (done_label.IsLinked()) { + __ Bind(&done_label); + } } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -6743,6 +6374,15 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* } } +void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) { RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; @@ -8430,8 +8070,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // return_address: CheckLastTempIsBakerCcEntrypointRegister(instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow); Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8444,16 +8085,18 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, root_reg, obj, offset); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8553,10 +8196,12 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register ref_reg = ref.AsRegister<Register>(); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = temp.AsRegister<Register>(); @@ -8564,10 +8209,14 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } CheckLastTempIsBakerCcEntrypointRegister(instruction); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow); Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8580,19 +8229,20 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - Register ref_reg = ref.AsRegister<Register>(); DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, ref_reg, base, offset); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8638,7 +8288,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8663,15 +8313,15 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } @@ -9426,14 +9076,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index ac9d57aa0a..b94ee20d9d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,6 +299,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); + void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -625,14 +626,6 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // `temp` is an extra temporary register that is used for some conditions; - // callers may not specify it, in which case the method will use a scratch - // register instead. - void GenerateConditionWithZero(IfCondition condition, - Register out, - Register in, - Register temp = kNoRegister); - private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7d9778a4e7..fa39b79e39 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1515,7 +1515,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki if (kind == Location::kRegister) { scratch = LocationFrom(vixl_temps_.AcquireX()); } else { - DCHECK(kind == Location::kFpuRegister); + DCHECK_EQ(kind, Location::kFpuRegister); scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) : vixl_temps_.AcquireD()); @@ -1743,9 +1743,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); } -// Allocate a scratch register from the VIXL pool, querying first into -// the floating-point register pool, and then the the core register -// pool. This is essentially a reimplementation of +// Allocate a scratch register from the VIXL pool, querying first +// the floating-point register pool, and then the core register +// pool. This is essentially a reimplementation of // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize // using a different allocation strategy. static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, @@ -1893,7 +1893,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, // ask for a scratch register of any type (core or FP). // // Also, we start by asking for a FP scratch register first, as the - // demand of scratch core registers is higher. This is why we + // demand of scratch core registers is higher. This is why we // use AcquireFPOrCoreCPURegisterOfSize instead of // UseScratchRegisterScope::AcquireCPURegisterOfSize, which // allocates core scratch registers first. @@ -2661,6 +2661,38 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + + HIntConstant* shift = instruction->GetShift()->AsIntConstant(); + + locations->SetInAt(0, Location::RequiresRegister()); + // For byte case we don't need to shift the index variable so we can encode the data offset into + // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist + // data offset constant generation out of the loop and reduce the critical path length in the + // loop. + locations->SetInAt(1, shift->GetValue() == 0 + ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) + : Location::RequiresRegister()); + locations->SetInAt(2, Location::ConstantLocation(shift)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + Register index_reg = InputRegisterAt(instruction, 0); + uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); + uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); + + if (shift == 0) { + __ Add(OutputRegister(instruction), index_reg, offset); + } else { + Register offset_reg = InputRegisterAt(instruction, 1); + __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); + } +} + void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); @@ -6102,7 +6134,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6197,7 +6229,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -6571,14 +6603,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 502b298163..1759c68125 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -124,6 +124,10 @@ static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Lab __ bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { + return rt.IsLow() && rn.IsLow() && offset < 32u; +} + class EmitAdrCode { public: EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) @@ -1771,34 +1775,6 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege } } -static int64_t AdjustConstantForCondition(int64_t value, - IfCondition* condition, - IfCondition* opposite) { - if (value == 1) { - if (*condition == kCondB) { - value = 0; - *condition = kCondEQ; - *opposite = kCondNE; - } else if (*condition == kCondAE) { - value = 0; - *condition = kCondNE; - *opposite = kCondEQ; - } - } else if (value == -1) { - if (*condition == kCondGT) { - value = 0; - *condition = kCondGE; - *opposite = kCondLT; - } else if (*condition == kCondLE) { - value = 0; - *condition = kCondLT; - *opposite = kCondGE; - } - } - - return value; -} - static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( HCondition* condition, bool invert, @@ -1821,37 +1797,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( const vixl32::Register left_high = HighRegisterFrom(left); const vixl32::Register left_low = LowRegisterFrom(left); - int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite); - UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); - - // Comparisons against 0 are common enough to deserve special attention. - if (value == 0) { - switch (cond) { - case kCondNE: - // x > 0 iff x != 0 when the comparison is unsigned. - case kCondA: - ret = std::make_pair(ne, eq); - FALLTHROUGH_INTENDED; - case kCondEQ: - // x <= 0 iff x == 0 when the comparison is unsigned. - case kCondBE: - __ Orrs(temps.Acquire(), left_low, left_high); - return ret; - case kCondLT: - case kCondGE: - __ Cmp(left_high, 0); - return std::make_pair(ARMCondition(cond), ARMCondition(opposite)); - // Trivially true or false. - case kCondB: - ret = std::make_pair(ne, eq); - FALLTHROUGH_INTENDED; - case kCondAE: - __ Cmp(left_low, left_low); - return ret; - default: - break; - } - } + int64_t value = Int64ConstantFrom(right); switch (cond) { case kCondEQ: @@ -1896,6 +1842,8 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( FALLTHROUGH_INTENDED; case kCondGE: case kCondLT: { + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + __ Cmp(left_low, Low32Bits(value)); __ Sbcs(temps.Acquire(), left_high, High32Bits(value)); ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); @@ -2013,22 +1961,18 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { const LocationSummary* const locations = condition->GetLocations(); + const IfCondition c = condition->GetCondition(); if (locations->InAt(1).IsConstant()) { - IfCondition c = condition->GetCondition(); - IfCondition opposite = condition->GetOppositeCondition(); - const int64_t value = - AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); + const int64_t value = Int64ConstantFrom(locations->InAt(1)); if (c < kCondLT || c > kCondGE) { // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the least significant half of the first input to be compared // is in a low register (the other half is read outside an IT block), and // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used; 0 is always handled, no matter what registers are - // used by the first input. - if (value != 0 && - (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { + // encoding can be used. + if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) { return false; } // TODO(VIXL): The rest of the checks are there to keep the backend in sync with @@ -2047,353 +1991,6 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) return true; } -static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); - - const vixl32::Register out = OutputRegister(cond); - const auto condition = GenerateTest(cond, false, codegen); - - __ Mov(LeaveFlags, out, 0); - - if (out.IsLow()) { - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(codegen->GetVIXLAssembler(), - 2 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(condition.first); - __ mov(condition.first, out, 1); - } else { - vixl32::Label done_label; - vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - - __ B(condition.second, final_label, /* far_target */ false); - __ Mov(out, 1); - - if (done_label.IsReferenced()) { - __ Bind(&done_label); - } - } -} - -static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); - - const LocationSummary* const locations = cond->GetLocations(); - IfCondition condition = cond->GetCondition(); - const vixl32::Register out = OutputRegister(cond); - const Location left = locations->InAt(0); - const Location right = locations->InAt(1); - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - vixl32::Register temp; - UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), - &condition, - &opposite); - Operand right_high = High32Bits(value); - Operand right_low = Low32Bits(value); - - // The output uses Location::kNoOutputOverlap. - if (out.Is(left_high)) { - std::swap(left_low, left_high); - std::swap(right_low, right_high); - } - - __ Sub(out, left_low, right_low); - temp = temps.Acquire(); - __ Sub(temp, left_high, right_high); - } else { - DCHECK(right.IsRegisterPair()); - temp = temps.Acquire(); - __ Sub(temp, left_high, HighRegisterFrom(right)); - __ Sub(out, left_low, LowRegisterFrom(right)); - } - - // Need to check after calling AdjustConstantForCondition(). - DCHECK(condition == kCondEQ || condition == kCondNE) << condition; - - if (condition == kCondNE && out.IsLow()) { - __ Orrs(out, out, temp); - - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(codegen->GetVIXLAssembler(), - 2 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(ne); - __ mov(ne, out, 1); - } else { - __ Orr(out, out, temp); - codegen->GenerateConditionWithZero(condition, out, out, temp); - } -} - -static void GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label, - CodeGeneratorARMVIXL* codegen) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label); - } else { - __ B(ARMCondition(true_high_cond), true_label); - __ B(ARMCondition(false_high_cond), false_label); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label); -} - -static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); - - const LocationSummary* const locations = cond->GetLocations(); - IfCondition condition = cond->GetCondition(); - const vixl32::Register out = OutputRegister(cond); - const Location left = locations->InAt(0); - const Location right = locations->InAt(1); - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - - // Comparisons against 0 are common enough to deserve special attention. - if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) { - switch (condition) { - case kCondNE: - case kCondA: - if (out.IsLow()) { - // We only care if both input registers are 0 or not. - __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left)); - - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(codegen->GetVIXLAssembler(), - 2 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(ne); - __ mov(ne, out, 1); - return; - } - - FALLTHROUGH_INTENDED; - case kCondEQ: - case kCondBE: - // We only care if both input registers are 0 or not. - __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left)); - codegen->GenerateConditionWithZero(condition, out, out); - return; - case kCondLT: - case kCondGE: - // We only care about the sign bit. - FALLTHROUGH_INTENDED; - case kCondAE: - case kCondB: - codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left)); - return; - case kCondLE: - case kCondGT: - default: - break; - } - } - } - - if ((condition == kCondEQ || condition == kCondNE) && - // If `out` is a low register, then the GenerateConditionGeneric() - // function generates a shorter code sequence that is still branchless. - (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { - GenerateEqualLong(cond, codegen); - return; - } - - if (CanGenerateTest(cond, codegen->GetAssembler())) { - GenerateConditionGeneric(cond, codegen); - return; - } - - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - vixl32::Label true_label, false_label; - - GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen); - - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); - - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - - if (done_label.IsReferenced()) { - __ Bind(&done_label); - } -} - -static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - const Primitive::Type type = cond->GetLeft()->GetType(); - - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - - if (type == Primitive::kPrimLong) { - GenerateConditionLong(cond, codegen); - return; - } - - IfCondition condition = cond->GetCondition(); - vixl32::Register in = InputRegisterAt(cond, 0); - const vixl32::Register out = OutputRegister(cond); - const Location right = cond->GetLocations()->InAt(1); - int64_t value; - - if (right.IsConstant()) { - IfCondition opposite = cond->GetOppositeCondition(); - - value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite); - - // Comparisons against 0 are common enough to deserve special attention. - if (value == 0) { - switch (condition) { - case kCondNE: - case kCondA: - if (out.IsLow() && out.Is(in)) { - __ Cmp(out, 0); - - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(codegen->GetVIXLAssembler(), - 2 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(ne); - __ mov(ne, out, 1); - return; - } - - FALLTHROUGH_INTENDED; - case kCondEQ: - case kCondBE: - case kCondLT: - case kCondGE: - case kCondAE: - case kCondB: - codegen->GenerateConditionWithZero(condition, out, in); - return; - case kCondLE: - case kCondGT: - default: - break; - } - } - } - - if (condition == kCondEQ || condition == kCondNE) { - Operand operand(0); - - if (right.IsConstant()) { - operand = Operand::From(value); - } else if (out.Is(RegisterFrom(right))) { - // Avoid 32-bit instructions if possible. - operand = InputOperandAt(cond, 0); - in = RegisterFrom(right); - } else { - operand = InputOperandAt(cond, 1); - } - - if (condition == kCondNE && out.IsLow()) { - __ Subs(out, in, operand); - - // We use the scope because of the IT block that follows. - ExactAssemblyScope guard(codegen->GetVIXLAssembler(), - 2 * vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - - __ it(ne); - __ mov(ne, out, 1); - } else { - __ Sub(out, in, operand); - codegen->GenerateConditionWithZero(condition, out, out); - } - - return; - } - - GenerateConditionGeneric(cond, codegen); -} - static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { const Primitive::Type type = constant->GetType(); bool ret = false; @@ -2954,6 +2551,89 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } +void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond, + vixl32::Label* true_label, + vixl32::Label* false_label) { + LocationSummary* locations = cond->GetLocations(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + IfCondition if_cond = cond->GetCondition(); + + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); + IfCondition true_high_cond = if_cond; + IfCondition false_high_cond = cond->GetOppositeCondition(); + vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part + + // Set the conditions for the test, remembering that == needs to be + // decided using the low words. + switch (if_cond) { + case kCondEQ: + case kCondNE: + // Nothing to do. + break; + case kCondLT: + false_high_cond = kCondGT; + break; + case kCondLE: + true_high_cond = kCondLT; + break; + case kCondGT: + false_high_cond = kCondLT; + break; + case kCondGE: + true_high_cond = kCondGT; + break; + case kCondB: + false_high_cond = kCondA; + break; + case kCondBE: + true_high_cond = kCondB; + break; + case kCondA: + false_high_cond = kCondB; + break; + case kCondAE: + true_high_cond = kCondA; + break; + } + if (right.IsConstant()) { + int64_t value = Int64ConstantFrom(right); + int32_t val_low = Low32Bits(value); + int32_t val_high = High32Bits(value); + + __ Cmp(left_high, val_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, val_low); + } else { + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); + + __ Cmp(left_high, right_high); + if (if_cond == kCondNE) { + __ B(ARMCondition(true_high_cond), true_label); + } else if (if_cond == kCondEQ) { + __ B(ARMCondition(false_high_cond), false_label); + } else { + __ B(ARMCondition(true_high_cond), true_label); + __ B(ARMCondition(false_high_cond), false_label); + } + // Must be equal high, so compare the lows. + __ Cmp(left_low, right_low); + } + // The last comparison might be unsigned. + // TODO: optimize cases where this is always true/false + __ B(final_condition, true_label); +} + void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target_in, vixl32::Label* false_target_in) { @@ -2988,7 +2668,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_); + GenerateLongComparesAndJumps(condition, true_target, false_target); if (false_target != &fallthrough) { __ B(false_target); @@ -3299,83 +2979,6 @@ void CodeGeneratorARMVIXL::GenerateNop() { __ Nop(); } -// `temp` is an extra temporary register that is used for some conditions; -// callers may not specify it, in which case the method will use a scratch -// register instead. -void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, - vixl32::Register out, - vixl32::Register in, - vixl32::Register temp) { - switch (condition) { - case kCondEQ: - // x <= 0 iff x == 0 when the comparison is unsigned. - case kCondBE: - if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) { - temp = out; - } - - // Avoid 32-bit instructions if possible; note that `in` and `temp` must be - // different as well. - if (in.IsLow() && temp.IsLow() && !in.Is(temp)) { - // temp = - in; only 0 sets the carry flag. - __ Rsbs(temp, in, 0); - - if (out.Is(in)) { - std::swap(in, temp); - } - - // out = - in + in + carry = carry - __ Adc(out, temp, in); - } else { - // If `in` is 0, then it has 32 leading zeros, and less than that otherwise. - __ Clz(out, in); - // Any number less than 32 logically shifted right by 5 bits results in 0; - // the same operation on 32 yields 1. - __ Lsr(out, out, 5); - } - - break; - case kCondNE: - // x > 0 iff x != 0 when the comparison is unsigned. - case kCondA: { - UseScratchRegisterScope temps(GetVIXLAssembler()); - - if (out.Is(in)) { - if (!temp.IsValid() || in.Is(temp)) { - temp = temps.Acquire(); - } - } else if (!temp.IsValid() || !temp.IsLow()) { - temp = out; - } - - // temp = in - 1; only 0 does not set the carry flag. - __ Subs(temp, in, 1); - // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry - __ Sbc(out, in, temp); - break; - } - case kCondGE: - __ Mvn(out, in); - in = out; - FALLTHROUGH_INTENDED; - case kCondLT: - // We only care about the sign bit. - __ Lsr(out, in, 31); - break; - case kCondAE: - // Trivially true. - __ Mov(out, 1); - break; - case kCondB: - // Trivially false. - __ Mov(out, 0); - break; - default: - LOG(FATAL) << "Unexpected condition " << condition; - UNREACHABLE(); - } -} - void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); @@ -3412,41 +3015,52 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - const Primitive::Type type = cond->GetLeft()->GetType(); + const vixl32::Register out = OutputRegister(cond); + + if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) { + const auto condition = GenerateTest(cond, false, codegen_); + // We use the scope because of the IT block that follows. + ExactAssemblyScope guard(GetVIXLAssembler(), + 4 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); - if (Primitive::IsFloatingPointType(type)) { - GenerateConditionGeneric(cond, codegen_); + __ it(condition.first); + __ mov(condition.first, out, 1); + __ it(condition.second); + __ mov(condition.second, out, 0); return; } - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; - - if (type == Primitive::kPrimBoolean) { - const IfCondition c = cond->GetCondition(); - vixl32::Register left = InputRegisterAt(cond, 0); - const vixl32::Register out = OutputRegister(cond); - const Location right_loc = cond->GetLocations()->InAt(1); + // Convert the jumps into the result. + vixl32::Label done_label; + vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label); - // All other cases are handled by the instruction simplifier. - DCHECK((c == kCondEQ || c == kCondNE) && !right_loc.IsConstant()); + if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) { + vixl32::Label true_label, false_label; - vixl32::Register right = RegisterFrom(right_loc); + GenerateLongComparesAndJumps(cond, &true_label, &false_label); - // Avoid 32-bit instructions if possible. - if (out.Is(right)) { - std::swap(left, right); - } + // False case: result = 0. + __ Bind(&false_label); + __ Mov(out, 0); + __ B(final_label); - __ Eor(out, left, right); + // True case: result = 1. + __ Bind(&true_label); + __ Mov(out, 1); + } else { + DCHECK(CanGenerateTest(cond, codegen_->GetAssembler())); - if (c == kCondEQ) { - __ Eor(out, out, 1); - } + const auto condition = GenerateTest(cond, false, codegen_); - return; + __ Mov(LeaveFlags, out, 0); + __ B(condition.second, final_label, /* far_target */ false); + __ Mov(out, 1); } - GenerateConditionIntegralOrNonPrimitive(cond, codegen_); + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -6833,6 +6447,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddr } } +void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instruction) { + LOG(FATAL) << "Unreachable " << instruction->GetId(); +} + void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -8557,8 +8181,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( + root_reg.GetCode(), narrow); vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8573,15 +8198,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8682,10 +8308,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); vixl32::Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = RegisterFrom(temp); @@ -8693,12 +8321,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); + base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8715,19 +8346,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // Note: We need a Wide NEG for the unpoisoning. + // Note: We need a specific width for the unpoisoning NEG. if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8773,7 +8409,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8803,9 +8439,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. // Note: We need a Wide NEG for the unpoisoning. @@ -8813,6 +8447,8 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } @@ -9625,14 +9261,20 @@ static void PatchJitRootUse(uint8_t* code, void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + VIXLUInt32Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + VIXLUInt32Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index afff72fb52..657d3c134f 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -401,6 +401,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateCompareTestAndBranch(HCondition* condition, vixl::aarch32::Label* true_target, vixl::aarch32::Label* false_target); + void GenerateLongComparesAndJumps(HCondition* cond, + vixl::aarch32::Label* true_label, + vixl::aarch32::Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -717,14 +720,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, vixl::aarch32::Register out); - // `temp` is an extra temporary register that is used for some conditions; - // callers may not specify it, in which case the method will use a scratch - // register instead. - void GenerateConditionWithZero(IfCondition condition, - vixl::aarch32::Register out, - vixl::aarch32::Register in, - vixl::aarch32::Register temp = vixl32::Register()); - private: vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index e9870acff4..fdfa4eedf8 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1780,16 +1780,18 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const JitPatchInfo& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file, - dex::StringIndex(info.index))); + const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file, + dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const JitPatchInfo& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file, - dex::TypeIndex(info.index))); + const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file, + dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index f04e3841f5..d3ae3a729b 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1586,14 +1586,20 @@ void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code, void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const auto& entry : jit_string_patches_) { - const auto& it = jit_string_roots_.find(entry.first); + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_string_roots_.find(string_reference); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { - const auto& it = jit_class_roots_.find(entry.first); + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + const auto it = jit_class_roots_.find(type_reference); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, entry.second, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 57f7e6b25c..478bd24388 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -783,6 +783,12 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress( /*out*/ Register* scratch) { LocationSummary* locations = instruction->GetLocations(); Register base = InputRegisterAt(instruction, 0); + + if (instruction->InputAt(1)->IsIntermediateAddressIndex()) { + DCHECK(!is_string_char_at); + return MemOperand(base.X(), InputRegisterAt(instruction, 1).X()); + } + Location index = locations->InAt(1); uint32_t offset = is_string_char_at ? mirror::String::ValueOffset().Uint32Value() diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cf2d5cbee3..bd9a5d2564 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7703,7 +7703,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { constant_area_start_ = assembler->CodeSize(); // Populate any jump tables. - for (auto jump_table : fixups_to_jump_tables_) { + for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { jump_table->CreateJumpTable(); } @@ -7842,17 +7842,19 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find( + const auto it = jit_string_roots_.find( StringReference(&info.dex_file, dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find( + const auto it = jit_class_roots_.find( TypeReference(&info.dex_file, dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f2ed52b5a5..6b0e001ad8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -7055,7 +7055,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { constant_area_start_ = assembler->CodeSize(); // Populate any jump tables. - for (auto jump_table : fixups_to_jump_tables_) { + for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { jump_table->CreateJumpTable(); } @@ -7149,17 +7149,19 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto& it = jit_string_roots_.find( + const auto it = jit_string_roots_.find( StringReference(&info.dex_file, dex::StringIndex(info.index))); DCHECK(it != jit_string_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto& it = jit_class_roots_.find( + const auto it = jit_class_roots_.find( TypeReference(&info.dex_file, dex::TypeIndex(info.index))); DCHECK(it != jit_class_roots_.end()); - PatchJitRootUse(code, roots_data, info, it->second); + uint64_t index_in_table = it->second; + PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 4ba5c5580f..fe25b7690d 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #endif }; - for (auto test_config : test_config_candidates) { + for (const CodegenTargetConfig& test_config : test_config_candidates) { if (CanExecute(test_config.GetInstructionSet())) { v.push_back(test_config); } @@ -76,7 +76,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { + for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data); @@ -89,7 +89,7 @@ static void TestCode(const uint16_t* data, static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { + for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong); @@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // // Assertion failed (!available->IsEmpty()) // - // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable. + // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable, + // because of the following situation: + // + // 1. a temp register (IP0) is allocated as a scratch register by + // the parallel move resolver to solve a cycle (swap): + // + // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ] + // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ] + // + // 2. within CodeGeneratorARM64::MoveLocation, another temp + // register (IP1) is allocated to generate the swap between two + // double stack slots; + // + // 3. VIXL requires a third temp register to emit the `Ldr` or + // `Str` operation from CodeGeneratorARM64::MoveLocation (as + // one of the stack slots' offsets cannot be encoded as an + // immediate), but the pool of (core) temp registers is now + // empty. + // + // The solution used so far is to use a floating-point temp register + // (D31) in step #2, so that IP1 is available for step #3. + HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); move->AddMove(Location::DoubleStackSlot(0), Location::DoubleStackSlot(257), @@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { InternalCodeAllocator code_allocator; codegen.Finalize(&code_allocator); } - #endif #ifdef ART_ENABLE_CODEGEN_mips diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 31cd204c9f..00a16fe849 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -243,7 +243,7 @@ static void ValidateGraph(HGraph* graph) { GraphChecker graph_checker(graph); graph_checker.Run(); if (!graph_checker.IsValid()) { - for (const auto& error : graph_checker.GetErrors()) { + for (const std::string& error : graph_checker.GetErrors()) { std::cout << error << std::endl; } } diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c93bc210be..8ea312d0ea 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -516,13 +516,13 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); - for (auto dominated_block : block->GetDominatedBlocks()) { + for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) { if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { return true; } } - for (auto successor : block->GetSuccessors()) { + for (const HBasicBlock* successor : block->GetSuccessors()) { if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { return true; } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index f16e3727c8..311be1fb49 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -216,5 +216,18 @@ void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { } } +void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { + if (!instruction->IsStringCharAt() + && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { + RecordSimplification(); + } +} + +void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { + if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { + RecordSimplification(); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index eec4e49792..8596f6ad40 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -75,6 +75,8 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; void VisitVecMul(HVecMul* instruction) OVERRIDE; + void VisitVecLoad(HVecLoad* instruction) OVERRIDE; + void VisitVecStore(HVecStore* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index c39e5f4d3b..e5a8499ff4 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -16,6 +16,8 @@ #include "instruction_simplifier_shared.h" +#include "mirror/array-inl.h" + namespace art { namespace { @@ -346,4 +348,59 @@ bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { return false; } +bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { + if (index->IsConstant()) { + // If index is constant the whole address calculation often can be done by LDR/STR themselves. + // TODO: Treat the case with not-embedable constant. + return false; + } + + HGraph* graph = access->GetBlock()->GetGraph(); + ArenaAllocator* arena = graph->GetArena(); + Primitive::Type packed_type = access->GetPackedType(); + uint32_t data_offset = mirror::Array::DataOffset( + Primitive::ComponentSize(packed_type)).Uint32Value(); + size_t component_shift = Primitive::ComponentSizeShift(packed_type); + + bool is_extracting_beneficial = false; + // It is beneficial to extract index intermediate address only if there are at least 2 users. + for (const HUseListNode<HInstruction*>& use : index->GetUses()) { + HInstruction* user = use.GetUser(); + if (user->IsVecMemoryOperation() && user != access) { + HVecMemoryOperation* another_access = user->AsVecMemoryOperation(); + Primitive::Type another_packed_type = another_access->GetPackedType(); + uint32_t another_data_offset = mirror::Array::DataOffset( + Primitive::ComponentSize(another_packed_type)).Uint32Value(); + size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type); + if (another_data_offset == data_offset && another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } else if (user->IsIntermediateAddressIndex()) { + HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex(); + uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue(); + size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); + if (another_data_offset == data_offset && another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } + } + + if (!is_extracting_beneficial) { + return false; + } + + // Proceed to extract the index + data_offset address computation. + HIntConstant* offset = graph->GetIntConstant(data_offset); + HIntConstant* shift = graph->GetIntConstant(component_shift); + HIntermediateAddressIndex* address = + new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); + + access->GetBlock()->InsertInstructionBefore(address, access); + access->ReplaceInput(address, 1); + + return true; +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 2ea103a518..371619fa2e 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -59,6 +59,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, size_t data_offset); bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); +bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index fc7d20c793..69cf9a126f 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -2598,7 +2598,11 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ eor(out, out, ShifterOperand(infinity)); - codegen_->GenerateConditionWithZero(kCondEQ, out, out); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); } void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2621,7 +2625,11 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ eor(out, out, ShifterOperand(infinity_high2)); // We don't care about the sign bit, so shift left. __ orr(out, IP, ShifterOperand(out, LSL, 1)); - codegen_->GenerateConditionWithZero(kCondEQ, out, out); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); } void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 56d06eb666..356d5bcb0c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2971,7 +2971,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { // We don't care about the sign bit, so shift left. __ Lsl(out, out, 1); __ Eor(out, out, infinity); - codegen_->GenerateConditionWithZero(kCondEQ, out, out); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2997,7 +3001,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { __ Eor(out, out, infinity_high2); // We don't care about the sign bit, so shift left. __ Orr(out, temp, Operand(out, vixl32::LSL, 1)); - codegen_->GenerateConditionWithZero(kCondEQ, out, out); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ Clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); } void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 8ed2ad86bf..af0b193b03 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -759,7 +759,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, // We have to ensure that the native code doesn't clobber the XMM registers which are // non-volatile for ART, but volatile for Native calls. This will ensure that they are // saved in the prologue and properly restored. - for (auto fp_reg : non_volatile_xmm_regs) { + for (FloatRegister fp_reg : non_volatile_xmm_regs) { locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); } } @@ -898,7 +898,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, // We have to ensure that the native code doesn't clobber the XMM registers which are // non-volatile for ART, but volatile for Native calls. This will ensure that they are // saved in the prologue and properly restored. - for (auto fp_reg : non_volatile_xmm_regs) { + for (FloatRegister fp_reg : non_volatile_xmm_regs) { locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b4da20b558..522962485b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1406,7 +1406,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(BitwiseNegatedRight, Instruction) \ M(DataProcWithShifterOp, Instruction) \ M(MultiplyAccumulate, Instruction) \ - M(IntermediateAddress, Instruction) + M(IntermediateAddress, Instruction) \ + M(IntermediateAddressIndex, Instruction) #endif #ifndef ART_ENABLE_CODEGEN_arm diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index c6bfbcc7fb..075a816f3f 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -150,6 +150,49 @@ class HIntermediateAddress FINAL : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); }; +// This instruction computes part of the array access offset (data and index offset). +// +// For array accesses the element address has the following structure: +// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing +// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with +// the same data type and index. For example, for the following loop 5 accesses can share address +// computation: +// +// void foo(int[] a, int[] b, int[] c) { +// for (i...) { +// a[i] = a[i] + 5; +// b[i] = b[i] + c[i]; +// } +// } +// +// Note: as the instruction doesn't involve base array address into computations it has no side +// effects (in comparison of HIntermediateAddress). +class HIntermediateAddressIndex FINAL : public HExpression<3> { + public: + HIntermediateAddressIndex( + HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) + : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + SetRawInputAt(0, index); + SetRawInputAt(1, offset); + SetRawInputAt(2, shift); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetIndex() const { return InputAt(0); } + HInstruction* GetOffset() const { return InputAt(1); } + HInstruction* GetShift() const { return InputAt(2); } + + DECLARE_INSTRUCTION(IntermediateAddressIndex); + + private: + DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex); +}; + class HDataProcWithShifterOp FINAL : public HExpression<2> { public: enum OpKind { diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 52c247b52f..92fe9bfa7d 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -178,12 +178,17 @@ class HVecMemoryOperation : public HVecOperation { size_t vector_length, uint32_t dex_pc) : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc), - alignment_(Primitive::ComponentSize(packed_type), 0) { } + alignment_(Primitive::ComponentSize(packed_type), 0) { + DCHECK_GE(number_of_inputs, 2u); + } void SetAlignment(Alignment alignment) { alignment_ = alignment; } Alignment GetAlignment() const { return alignment_; } + HInstruction* GetArray() const { return InputAt(0); } + HInstruction* GetIndex() const { return InputAt(1); } + DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation); private: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 065c11eddb..f928f71209 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -638,11 +638,14 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); + HInstructionScheduling* scheduling = + new (arena) HInstructionScheduling(graph, instruction_set, codegen); HOptimization* arm_optimizations[] = { simplifier, side_effects, gvn, - fixups + fixups, + scheduling, }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); break; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 87f709f63d..300f4c6239 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1968,8 +1968,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( allocator_->Adapter(kArenaAllocRegisterAllocator)); - for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) { - LiveInterval* parent_interval = *it; + for (LiveInterval* parent_interval : *intervals) { DCHECK(parent_interval->IsParent()); DCHECK(!parent_interval->HasSpillSlot()); size_t start = parent_interval->GetStart(); diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index d65d20cf43..320f01a727 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -23,6 +23,10 @@ #include "scheduler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_arm +#include "scheduler_arm.h" +#endif + namespace art { void SchedulingGraph::AddDependency(SchedulingNode* node, @@ -264,10 +268,11 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description, // Start the dot graph. Use an increasing index for easier differentiation. output << "digraph G {\n"; for (const auto& entry : nodes_map_) { - DumpAsDotNode(output, entry.second); + SchedulingNode* node = entry.second; + DumpAsDotNode(output, node); } // Create a fake 'end_of_scheduling' node to help visualization of critical_paths. - for (auto node : initial_candidates) { + for (SchedulingNode* node : initial_candidates) { const HInstruction* instruction = node->GetInstruction(); output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n " << "[label=\"" << node->GetLatency() << "\",dir=back]\n"; @@ -580,28 +585,39 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const { void HInstructionScheduling::Run(bool only_optimize_loop_blocks, bool schedule_randomly) { +#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) + // Phase-local allocator that allocates scheduler internal data structures like + // scheduling nodes, internel nodes map, dependencies, etc. + ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool()); + CriticalPathSchedulingNodeSelector critical_path_selector; + RandomSchedulingNodeSelector random_selector; + SchedulingNodeSelector* selector = schedule_randomly + ? static_cast<SchedulingNodeSelector*>(&random_selector) + : static_cast<SchedulingNodeSelector*>(&critical_path_selector); +#else // Avoid compilation error when compiling for unsupported instruction set. UNUSED(only_optimize_loop_blocks); UNUSED(schedule_randomly); +#endif switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { - // Phase-local allocator that allocates scheduler internal data structures like - // scheduling nodes, internel nodes map, dependencies, etc. - ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool()); - - CriticalPathSchedulingNodeSelector critical_path_selector; - RandomSchedulingNodeSelector random_selector; - SchedulingNodeSelector* selector = schedule_randomly - ? static_cast<SchedulingNodeSelector*>(&random_selector) - : static_cast<SchedulingNodeSelector*>(&critical_path_selector); - arm64::HSchedulerARM64 scheduler(&arena_allocator, selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; } #endif +#if defined(ART_ENABLE_CODEGEN_arm) + case kThumb2: + case kArm: { + arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); + arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor); + scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); + scheduler.Schedule(graph_); + break; + } +#endif default: break; } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index 9236a0e4fa..73e8087cd0 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -23,6 +23,7 @@ #include "driver/compiler_driver.h" #include "nodes.h" #include "optimization.h" +#include "code_generator.h" namespace art { @@ -469,8 +470,9 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction class HInstructionScheduling : public HOptimization { public: - HInstructionScheduling(HGraph* graph, InstructionSet instruction_set) + HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr) : HOptimization(graph, kInstructionScheduling), + codegen_(cg), instruction_set_(instruction_set) {} void Run() { @@ -480,6 +482,7 @@ class HInstructionScheduling : public HOptimization { static constexpr const char* kInstructionScheduling = "scheduler"; + CodeGenerator* const codegen_; const InstructionSet instruction_set_; private: diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc new file mode 100644 index 0000000000..1a89567991 --- /dev/null +++ b/compiler/optimizing/scheduler_arm.cc @@ -0,0 +1,822 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_utils.h" +#include "common_arm.h" +#include "mirror/array-inl.h" +#include "scheduler_arm.h" + +namespace art { +namespace arm { + +using helpers::Int32ConstantFrom; +using helpers::Uint64ConstantFrom; + +void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs, + // so a bubble (kArmNopLatency) is added to represent the internal carry flag + // dependency inside these pairs. + last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) { + HandleBinaryOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) { + HandleBinaryOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = 3 * kArmMulIntegerLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmMulFloatingPointLatency; + break; + default: + last_visited_latency_ = kArmMulIntegerLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) { + HandleBitwiseOperationLantencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { + switch (instr->GetResultType()) { + case Primitive::kPrimInt: + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: { + // HandleLongRotate + HInstruction* rhs = instr->GetRight(); + if (rhs->IsConstant()) { + uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; + if (rot != 0u) { + last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } + } else { + last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency; + last_visited_latency_ = kArmBranchLatency; + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << instr->GetResultType(); + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) { + Primitive::Type type = instr->GetResultType(); + HInstruction* rhs = instr->GetRight(); + switch (type) { + case Primitive::kPrimInt: + if (!rhs->IsConstant()) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: + if (!rhs->IsConstant()) { + last_visited_internal_latency_ = 8 * kArmIntegerOpLatency; + } else { + uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; + if (shift_value == 1 || shift_value >= 32) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + } + } + last_visited_latency_ = kArmIntegerOpLatency; + break; + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { + HandleShiftLatencies(instr); +} + +void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { + switch (instr->GetLeft()->GetType()) { + case Primitive::kPrimLong: + last_visited_internal_latency_ = 4 * kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency; + break; + default: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + } + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { + Primitive::Type type = instr->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + case Primitive::kPrimLong: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency; + break; + default: + last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + break; + } + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { + if (instruction->GetResultType() == Primitive::kPrimInt) { + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) { + if (internal_latency) { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmDataProcWithShifterOpLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) { + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + if (kind == HInstruction::kAdd) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else if (kind == HInstruction::kSub) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); + + const uint32_t shift_value = instruction->GetShiftAmount(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + + if (shift_value >= 32) { + // Different shift types actually generate similar code here, + // no need to differentiate shift types like the codegen pass does, + // which also avoids handling shift types from different ARM backends. + HandleGenerateDataProc(instruction); + } else { + DCHECK_GT(shift_value, 1U); + DCHECK_LT(shift_value, 32U); + + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(); + } else { + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + HandleGenerateDataProc(instruction); + } + } +} + +void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) { + const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + + if (instruction->GetType() == Primitive::kPrimInt) { + DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); + HandleGenerateDataProcInstruction(); + } else { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + HandleGenerateDataProc(instruction); + } else { + HandleGenerateLongDataProc(instruction); + } + } +} + +void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { + // Although the code generated is a simple `add` instruction, we found through empirical results + // that spacing it from its use in memory accesses was beneficial. + last_visited_internal_latency_ = kArmNopLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArmMulIntegerLatency; +} + +void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + HInstruction* index = instruction->InputAt(1); + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += kArmMemoryLoadLatency; + } + if (index->IsConstant()) { + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += + kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmBranchLatency; + } else { + last_visited_latency_ += kArmMemoryLoadLatency; + } + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } + if (maybe_compressed_char_at) { + last_visited_internal_latency_ += + kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmBranchLatency; + } else { + last_visited_latency_ += kArmMemoryLoadLatency; + } + } + break; + } + + case Primitive::kPrimNot: { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; + } else { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + } + last_visited_internal_latency_ = kArmMemoryLoadLatency; + } + } + break; + } + + case Primitive::kPrimLong: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimFloat: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimDouble: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) { + last_visited_latency_ = kArmMemoryLoadLatency; + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + last_visited_internal_latency_ = kArmMemoryLoadLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { + HInstruction* index = instruction->InputAt(1); + Primitive::Type value_type = instruction->GetComponentType(); + HInstruction* array_instr = instruction->GetArray(); + bool has_intermediate_address = array_instr->IsIntermediateAddress(); + + switch (value_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryStoreLatency; + } else { + if (has_intermediate_address) { + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + } + + case Primitive::kPrimNot: { + if (instruction->InputAt(2)->IsNullConstant()) { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryStoreLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryStoreLatency; + } + } else { + // Following the exact instructions of runtime type checks is too complicated, + // just giving it a simple slow latency. + last_visited_latency_ = kArmRuntimeTypeCheckLatency; + } + break; + } + + case Primitive::kPrimLong: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimFloat: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + case Primitive::kPrimDouble: { + if (index->IsConstant()) { + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + } + + default: + LOG(FATAL) << "Unreachable type " << value_type; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + // Users do not use any data results. + last_visited_latency_ = 0; +} + +void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) { + if (imm == 0) { + last_visited_internal_latency_ = 0; + last_visited_latency_ = 0; + } else if (imm == 1 || imm == -1) { + last_visited_latency_ = kArmIntegerOpLatency; + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { + Primitive::Type type = instruction->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HInstruction* rhs = instruction->GetRight(); + if (rhs->IsConstant()) { + int32_t imm = Int32ConstantFrom(rhs->AsConstant()); + HandleDivRemConstantIntegralLatencies(imm); + } else { + last_visited_latency_ = kArmDivIntegerLatency; + } + break; + } + case Primitive::kPrimFloat: + last_visited_latency_ = kArmDivFloatLatency; + break; + case Primitive::kPrimDouble: + last_visited_latency_ = kArmDivDoubleLatency; + break; + default: + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmLoadStringInternalLatency; + last_visited_latency_ = kArmMemoryLoadLatency; +} + +void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { + last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) { + if (instruction->IsStringAlloc()) { + last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency; + } else { + last_visited_internal_latency_ = kArmCallInternalLatency; + } + last_visited_latency_ = kArmCallLatency; +} + +void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { + Primitive::Type type = instruction->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HInstruction* rhs = instruction->GetRight(); + if (rhs->IsConstant()) { + int32_t imm = Int32ConstantFrom(rhs->AsConstant()); + HandleDivRemConstantIntegralLatencies(imm); + } else { + last_visited_internal_latency_ = kArmDivIntegerLatency; + last_visited_latency_ = kArmMulIntegerLatency; + } + break; + } + default: + last_visited_internal_latency_ = kArmCallInternalLatency; + last_visited_latency_ = kArmCallLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + DCHECK(codegen_ != nullptr); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + + case Primitive::kPrimNot: + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; + last_visited_latency_ = kArmMemoryLoadLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + case Primitive::kPrimFloat: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + + case Primitive::kPrimDouble: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = + kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryLoadLatency; + } + break; + + default: + last_visited_latency_ = kArmMemoryLoadLatency; + break; + } + + if (is_volatile) { + last_visited_internal_latency_ += kArmMemoryBarrierLatency; + } +} + +void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + DCHECK(codegen_ != nullptr); + bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + if (is_volatile) { + last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmMemoryBarrierLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + case Primitive::kPrimInt: + case Primitive::kPrimNot: + if (kPoisonHeapReferences && needs_write_barrier) { + last_visited_internal_latency_ += kArmIntegerOpLatency * 2; + } + last_visited_latency_ = kArmMemoryStoreLatency; + break; + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + case Primitive::kPrimFloat: + last_visited_latency_ = kArmMemoryStoreLatency; + break; + + case Primitive::kPrimDouble: + if (is_volatile && !atomic_ldrd_strd) { + last_visited_internal_latency_ = kArmIntegerOpLatency + + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_latency_ = kArmMemoryStoreLatency; + } + break; + + default: + last_visited_latency_ = kArmMemoryStoreLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); +} + +void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + DCHECK((block->GetLoopInformation() != nullptr) || + (block->IsEntryBlock() && instruction->GetNext()->IsGoto())); + // Users do not use any data results. + last_visited_latency_ = 0; +} + +void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { + Primitive::Type result_type = instr->GetResultType(); + Primitive::Type input_type = instr->GetInputType(); + + switch (result_type) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX + break; + + case Primitive::kPrimInt: + switch (input_type) { + case Primitive::kPrimLong: + last_visited_latency_ = kArmIntegerOpLatency; // MOV + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmIntegerOpLatency; + break; + } + break; + + case Primitive::kPrimLong: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + // MOV and extension + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // invokes runtime + last_visited_internal_latency_ = kArmCallInternalLatency; + break; + default: + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + } + break; + + case Primitive::kPrimFloat: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimLong: + // invokes runtime + last_visited_internal_latency_ = kArmCallInternalLatency; + break; + case Primitive::kPrimDouble: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + } + break; + + case Primitive::kPrimDouble: + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimLong: + last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency; + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + case Primitive::kPrimFloat: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + default: + last_visited_latency_ = kArmFloatingPointOpLatency; + break; + } + break; + + default: + last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency; + break; + } +} + +void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h new file mode 100644 index 0000000000..8d5e4f375b --- /dev/null +++ b/compiler/optimizing/scheduler_arm.h @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ + +#include "code_generator_arm_vixl.h" +#include "scheduler.h" + +namespace art { +namespace arm { +#ifdef ART_USE_OLD_ARM_BACKEND +typedef CodeGeneratorARM CodeGeneratorARMType; +#else +typedef CodeGeneratorARMVIXL CodeGeneratorARMType; +#endif + +// AArch32 instruction latencies. +// We currently assume that all ARM CPUs share the same instruction latency list. +// The following latencies were tuned based on performance experiments and +// automatic tuning using differential evolution approach on various benchmarks. +static constexpr uint32_t kArmIntegerOpLatency = 2; +static constexpr uint32_t kArmFloatingPointOpLatency = 11; +static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; +static constexpr uint32_t kArmMulIntegerLatency = 6; +static constexpr uint32_t kArmMulFloatingPointLatency = 11; +static constexpr uint32_t kArmDivIntegerLatency = 10; +static constexpr uint32_t kArmDivFloatLatency = 20; +static constexpr uint32_t kArmDivDoubleLatency = 25; +static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; +static constexpr uint32_t kArmMemoryLoadLatency = 9; +static constexpr uint32_t kArmMemoryStoreLatency = 9; +static constexpr uint32_t kArmMemoryBarrierLatency = 6; +static constexpr uint32_t kArmBranchLatency = 4; +static constexpr uint32_t kArmCallLatency = 5; +static constexpr uint32_t kArmCallInternalLatency = 29; +static constexpr uint32_t kArmLoadStringInternalLatency = 10; +static constexpr uint32_t kArmNopLatency = 2; +static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; +static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; + +class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { + public: + explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) + : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} + + // Default visitor for instructions not handled specifically below. + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArmIntegerOpLatency; + } + +// We add a second unused parameter to be able to use this macro like the others +// defined in `nodes.h`. +#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ + M(ArrayGet , unused) \ + M(ArrayLength , unused) \ + M(ArraySet , unused) \ + M(Add , unused) \ + M(Sub , unused) \ + M(And , unused) \ + M(Or , unused) \ + M(Ror , unused) \ + M(Xor , unused) \ + M(Shl , unused) \ + M(Shr , unused) \ + M(UShr , unused) \ + M(Mul , unused) \ + M(Div , unused) \ + M(Condition , unused) \ + M(Compare , unused) \ + M(BoundsCheck , unused) \ + M(InstanceFieldGet , unused) \ + M(InstanceFieldSet , unused) \ + M(InstanceOf , unused) \ + M(Invoke , unused) \ + M(LoadString , unused) \ + M(NewArray , unused) \ + M(NewInstance , unused) \ + M(Rem , unused) \ + M(StaticFieldGet , unused) \ + M(StaticFieldSet , unused) \ + M(SuspendCheck , unused) \ + M(TypeConversion , unused) + +#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ + M(BitwiseNegatedRight, unused) \ + M(MultiplyAccumulate, unused) \ + M(IntermediateAddress, unused) \ + M(DataProcWithShifterOp, unused) + +#define DECLARE_VISIT_INSTRUCTION(type, unused) \ + void Visit##type(H##type* instruction) OVERRIDE; + + FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + private: + void HandleBinaryOperationLantencies(HBinaryOperation* instr); + void HandleBitwiseOperationLantencies(HBinaryOperation* instr); + void HandleShiftLatencies(HBinaryOperation* instr); + void HandleDivRemConstantIntegralLatencies(int32_t imm); + void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleGenerateDataProcInstruction(bool internal_latency = false); + void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); + void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); + + // The latency setting for each HInstruction depends on how CodeGenerator may generate code, + // latency visitors may query CodeGenerator for such information for accurate latency settings. + CodeGeneratorARMType* codegen_; +}; + +class HSchedulerARM : public HScheduler { + public: + HSchedulerARM(ArenaAllocator* arena, + SchedulingNodeSelector* selector, + SchedulingLatencyVisitorARM* arm_latency_visitor) + : HScheduler(arena, arm_latency_visitor, selector) {} + ~HSchedulerARM() OVERRIDE {} + + bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { +#define CASE_INSTRUCTION_KIND(type, unused) case \ + HInstruction::InstructionKind::k##type: + switch (instruction->GetKind()) { + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) + return true; + FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) + return true; + default: + return HScheduler::IsSchedulable(instruction); + } +#undef CASE_INSTRUCTION_KIND + } + + private: + DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 31d13e2a26..d87600aa5e 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -28,6 +28,10 @@ #include "scheduler_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_arm +#include "scheduler_arm.h" +#endif + namespace art { // Return all combinations of ISA and code generator that are executable on @@ -56,7 +60,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { #endif }; - for (auto test_config : test_config_candidates) { + for (const CodegenTargetConfig& test_config : test_config_candidates) { if (CanExecute(test_config.GetInstructionSet())) { v.push_back(test_config); } @@ -65,133 +69,151 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class SchedulerTest : public CommonCompilerTest {}; - -#ifdef ART_ENABLE_CODEGEN_arm64 -TEST_F(SchedulerTest, DependencyGraph) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); - HBasicBlock* block1 = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry); - graph->AddBlock(block1); - graph->SetEntryBlock(entry); - - // entry: - // array ParameterValue - // c1 IntConstant - // c2 IntConstant - // block1: - // add1 Add [c1, c2] - // add2 Add [add1, c2] - // mul Mul [add1, add2] - // div_check DivZeroCheck [add2] (env: add2, mul) - // div Div [add1, div_check] - // array_get1 ArrayGet [array, add1] - // array_set1 ArraySet [array, add1, add2] - // array_get2 ArrayGet [array, add1] - // array_set2 ArraySet [array, add1, add2] - - HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); - HInstruction* c1 = graph->GetIntConstant(1); - HInstruction* c2 = graph->GetIntConstant(10); - HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2); - HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2); - HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2); - HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0); - HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0); - HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); - HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); - - DCHECK(div_check->CanThrow()); - - entry->AddInstruction(array); - - HInstruction* block_instructions[] = {add1, - add2, - mul, - div_check, - div, - array_get1, - array_set1, - array_get2, - array_set2}; - for (auto instr : block_instructions) { - block1->AddInstruction(instr); +class SchedulerTest : public CommonCompilerTest { + public: + SchedulerTest() : pool_(), allocator_(&pool_) { + graph_ = CreateGraph(&allocator_); } - HEnvironment* environment = new (&allocator) HEnvironment(&allocator, - 2, - graph->GetArtMethod(), + // Build scheduling graph, and run target specific scheduling on it. + void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->AddBlock(block1); + graph_->SetEntryBlock(entry); + + // entry: + // array ParameterValue + // c1 IntConstant + // c2 IntConstant + // block1: + // add1 Add [c1, c2] + // add2 Add [add1, c2] + // mul Mul [add1, add2] + // div_check DivZeroCheck [add2] (env: add2, mul) + // div Div [add1, div_check] + // array_get1 ArrayGet [array, add1] + // array_set1 ArraySet [array, add1, add2] + // array_get2 ArrayGet [array, add1] + // array_set2 ArraySet [array, add1, add2] + + HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), 0, - div_check); - div_check->SetRawEnvironment(environment); - environment->SetRawEnvAt(0, add2); - add2->AddEnvUseAt(div_check->GetEnvironment(), 0); - environment->SetRawEnvAt(1, mul); - mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - - ArenaAllocator* arena = graph->GetArena(); - CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector); - SchedulingGraph scheduling_graph(&scheduler, arena); - // Instructions must be inserted in reverse order into the scheduling graph. - for (auto instr : ReverseRange(block_instructions)) { - scheduling_graph.AddNode(instr); + Primitive::kPrimNot); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(10); + HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2); + HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2); + HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2); + HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0); + HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0); + HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); + HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); + HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); + HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); + + DCHECK(div_check->CanThrow()); + + entry->AddInstruction(array); + + HInstruction* block_instructions[] = {add1, + add2, + mul, + div_check, + div, + array_get1, + array_set1, + array_get2, + array_set2}; + for (HInstruction* instr : block_instructions) { + block1->AddInstruction(instr); + } + + HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_, + 2, + graph_->GetArtMethod(), + 0, + div_check); + div_check->SetRawEnvironment(environment); + environment->SetRawEnvAt(0, add2); + add2->AddEnvUseAt(div_check->GetEnvironment(), 0); + environment->SetRawEnvAt(1, mul); + mul->AddEnvUseAt(div_check->GetEnvironment(), 1); + + SchedulingGraph scheduling_graph(scheduler, graph_->GetArena()); + // Instructions must be inserted in reverse order into the scheduling graph. + for (HInstruction* instr : ReverseRange(block_instructions)) { + scheduling_graph.AddNode(instr); + } + + // Should not have dependencies cross basic blocks. + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2)); + + // Define-use dependency. + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2)); + ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1)); + ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2)); + + // Read and write dependencies + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); + + // Env dependency. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check)); + + // CanThrow. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check)); + + // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor. + scheduler->Schedule(graph_); } - // Should not have dependencies cross basic blocks. - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2)); - - // Define-use dependency. - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2)); - ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1)); - ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2)); - - // Read and write dependencies - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); - - // Env dependency. - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); - ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check)); - - // CanThrow. - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check)); + void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) { + for (CodegenTargetConfig target_config : GetTargetConfigs()) { + HGraph* graph = CreateCFG(&allocator_, data); + + // Schedule the graph randomly. + HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); + scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); + + RunCode(target_config, + graph, + [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, + has_result, expected); + } + } + + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; +}; + +#if defined(ART_ENABLE_CODEGEN_arm64) +TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); + TestBuildDependencyGraphAndSchedule(&scheduler); } #endif -static void CompileWithRandomSchedulerAndRun(const uint16_t* data, - bool has_result, - int expected) { - for (CodegenTargetConfig target_config : GetTargetConfigs()) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateCFG(&arena, data); - - // Schedule the graph randomly. - HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); - scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); - - RunCode(target_config, - graph, - [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, - has_result, expected); - } +#if defined(ART_ENABLE_CODEGEN_arm) +TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); + arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); + TestBuildDependencyGraphAndSchedule(&scheduler); } +#endif TEST_F(SchedulerTest, RandomScheduling) { // diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index eedaf6e67e..98ded24257 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -56,7 +56,7 @@ static bool IsInBootImage(ArtMethod* method) { const std::vector<gc::space::ImageSpace*>& image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces(); for (gc::space::ImageSpace* image_space : image_spaces) { - const auto& method_section = image_space->GetImageHeader().GetMethodsSection(); + const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection(); if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) { return true; } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 5c36110cf6..2ff9018510 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -924,9 +924,11 @@ class Thumb2Assembler FINAL : public ArmAssembler { class ScopedForce32Bit { public: - explicit ScopedForce32Bit(Thumb2Assembler* assembler) + explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true) : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { - assembler->Force32Bit(); + if (force) { + assembler->Force32Bit(); + } } ~ScopedForce32Bit() { diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc index c577b6e105..78860e3f96 100644 --- a/dexlayout/dexdiag.cc +++ b/dexlayout/dexdiag.cc @@ -200,7 +200,8 @@ static void ProcessPageMap(uint64_t* pagemap, for (size_t page = start; page < end; ++page) { char type_char = '.'; if (PM_PAGEMAP_PRESENT(pagemap[page])) { - uint16_t type = FindSectionTypeForPage(page, sections); + const size_t dex_page_offset = page - start; + uint16_t type = FindSectionTypeForPage(dex_page_offset, sections); page_counts->Increment(type); type_char = PageTypeChar(type); } @@ -231,7 +232,8 @@ static void DisplayDexStatistics(size_t start, return; } for (size_t page = start; page < end; ++page) { - mapped_pages.Increment(FindSectionTypeForPage(page, sections)); + const size_t dex_page_offset = page - start; + mapped_pages.Increment(FindSectionTypeForPage(dex_page_offset, sections)); } size_t total_resident_pages = 0; printer->PrintHeader(); diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index 1a5e39f0f7..d6056c0ece 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -71,11 +71,15 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET #undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET -#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET -#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET #undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET } // namespace arm namespace arm64 { diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index f1f1766ad4..8f2fd6ecc9 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -24,18 +24,25 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 192 +// The offset from the art_quick_read_barrier_mark_introspection (used for field +// loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR, +// i.e. art_quick_read_barrier_mark_introspection_narrow. +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20 +// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints, +// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0 // The offset from art_quick_read_barrier_mark_introspection to the array switch cases, // i.e. art_quick_read_barrier_mark_introspection_arrays. #define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 -// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint, -// i.e. art_quick_read_barrier_mark_introspection_gc_roots. -#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0 // The offset of the reference load LDR from the return address in LR for field loads. #ifdef USE_HEAP_POISONING -#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -4 #else -#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -4 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -2 #endif // The offset of the reference load LDR from the return address in LR for array loads. #ifdef USE_HEAP_POISONING @@ -44,7 +51,8 @@ #define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4 #endif // The offset of the reference load LDR from the return address in LR for GC root loads. -#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6 // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 6b7247773a..919b0afc40 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -53,8 +53,11 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_narrow(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); -extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow( + mirror::Object*); // Used by soft float. // Single-precision FP arithmetics. @@ -86,18 +89,27 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; - // Check that array switch cases are at appropriate offsets from the introspection entrypoint. // For the alignment check, strip the Thumb mode bit. DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u); + // Check the field narrow entrypoint offset from the introspection entrypoint. + intptr_t narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_narrow) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET, narrow_diff); + // Check array switch cases offsets from the introspection entrypoint. intptr_t array_diff = reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff); - // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint. - intptr_t gc_roots_diff = - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) - + // Check the GC root entrypoint offsets from the introspection entrypoint. + intptr_t gc_roots_wide_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_wide) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET, gc_roots_wide_diff); + intptr_t gc_roots_narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); - DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff); // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12. // We're using the entry to hold a pointer to the introspection entrypoint instead. qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 6be7537d61..31a7f6ae8e 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2189,7 +2189,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 .endm -#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET #error "Array and field introspection code sharing requires same LDR offset." #endif .macro BRBMI_ARRAY_LOAD index_reg @@ -2208,7 +2208,10 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 BRBMI_BKPT_FILL_4B .endm -.macro BRBMI_SLOW_PATH ldr_offset +.macro BRBMI_RUNTIME_CALL + // Note: This macro generates exactly 22 bytes of code. The core register + // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. + push {r0-r3, r7, lr} // Save return address and caller-save registers. .cfi_adjust_cfa_offset 24 .cfi_rel_offset r0, 0 @@ -2234,11 +2237,72 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 .cfi_restore r3 .cfi_restore r7 .cfi_restore lr +.endm + +.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix + // If reference is null, just return it in the right register. + cmp ip, #0 + beq .Lmark_introspection_return\label_suffix + // Use R4 as temp and check the mark bit of the reference. + ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lmark_introspection_unmarked\label_suffix +.Lmark_introspection_return\label_suffix: +.endm + +.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix +.Lmark_introspection_unmarked\label_suffix: + // Check if the top two bits are one, if this is the case it is a forwarding address. +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in + // the highest bits and the "forwarding address" state to have all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + bhs .Lmark_introspection_forwarding_address\label_suffix +.endm + +.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix +.Lmark_introspection_forwarding_address\label_suffix: + // Note: This macro generates exactly 22 bytes of code, the branch is near. + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + b .Lmark_introspection_return\label_suffix +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. ldrh r4, [lr, #(-1 + \ldr_offset + 2)] - lsr r4, r4, #12 // Extract `ref_reg`. - b .Lmark_introspection_return_switch +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset + // Load the 16-bit instruction. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset)] +.endm + +.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix + .balign 64 + .thumb_func + .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function + .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix + .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix +art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: + BRBMI_RUNTIME_CALL + // Load the LDR (or the half of it) that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset + b .Lmark_introspection_extract_register_and_return\label_suffix + // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for + // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze + // the 6 byte forwarding address extraction here across the 32-byte boundary. + BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix + // And the slow path taking exactly 30 bytes (6 bytes for the forwarding + // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near + // branch) shall take the rest of the 32-byte section (within a cache line). + BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix + BRBMI_RUNTIME_CALL + b .Lmark_introspection_return\label_suffix .endm /* @@ -2249,14 +2313,16 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 * * The entrypoint is called through a thunk that differs across load kinds. * For field and array loads the LDR instruction in generated code follows - * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1 - * is an adjustment for the Thumb mode bit in LR, and the thunk knows the - * holder and performs the gray bit check, returning to the LDR instruction - * if the object is not gray, so this entrypoint no longer needs to know - * anything about the holder. For GC root loads, the LDR instruction in - * generated code precedes the branch to the thunk, i.e. the LDR is at - * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and - * the thunk does not do the gray bit check. + * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) + * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where + * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk + * knows the holder and performs the gray bit check, returning to the LDR + * instruction if the object is not gray, so this entrypoint no longer + * needs to know anything about the holder. For GC root loads, the LDR + * instruction in generated code precedes the branch to the thunk, i.e. the + * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) + * where the -1 is again the Thumb mode bit adjustment, and the thunk does + * not do the gray bit check. * * For field accesses and array loads with a constant index the thunk loads * the reference into IP using introspection and calls the main entrypoint, @@ -2288,11 +2354,29 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 * * The code structure is * art_quick_read_barrier_mark_introspection: - * Over 128 bytes for the main entrypoint code. - * Padding to 192 bytes if needed. - * art_quick_read_barrier_mark_introspection_gc_roots: - * GC root entrypoint code. - * Padding to 256 bytes if needed. + * Up to 32 bytes code for main entrypoint fast-path code for fields + * (and array elements with constant offset) with LDR encoding T3; + * jumps to the switch in the "narrow" entrypoint. + * Padding to 32 bytes if needed. + * art_quick_read_barrier_mark_introspection_narrow: + * Up to 48 bytes code for fast path code for fields (and array + * elements with constant offset) with LDR encoding T1, ending in the + * return switch instruction TBB and the table with switch offsets. + * Padding to 80 bytes if needed. + * .Lmark_introspection_return_switch_case_r0: + * Exactly 48 bytes of code for the return switch cases (12 cases, + * including BKPT for the reserved registers). + * Ends at 128 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_wide: + * GC root entrypoint code for LDR encoding T3 (28 bytes). + * Forwarding address extraction for LDR encoding T3 (6 bytes). + * Slow path for main entrypoint for LDR encoding T3 (30 bytes). + * Ends at 192 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_narrow: + * GC root entrypoint code for LDR encoding T1 (28 bytes). + * Forwarding address extraction for LDR encoding T1 (6 bytes). + * Slow path for main entrypoint for LDR encoding T1 (30 bytes). + * Ends at 256 bytes total. * art_quick_read_barrier_mark_introspection_arrays: * Exactly 128 bytes for array load switch cases (16x2 instructions). */ @@ -2302,17 +2386,30 @@ ENTRY art_quick_read_barrier_mark_introspection // (R4 is reserved for the entrypoint address.) // For heap poisoning, the reference is poisoned, so unpoison it first. UNPOISON_HEAP_REF ip - // If reference is null, just return it in the right register. - cmp ip, #0 - beq .Lmark_introspection_return - // Use R4 as temp and check the mark bit of the reference. - ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - beq .Lmark_introspection_unmarked -.Lmark_introspection_return: - // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. - ldrh r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)] + // Check for null or marked, lock word is loaded into IP. + BRBMI_CHECK_NULL_AND_MARKED _wide + // Load the half of the instruction that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +.Lmark_introspection_extract_register_and_return_wide: lsr r4, r4, #12 // Extract `ref_reg`. + b .Lmark_introspection_return_switch + + .balign 32 + .thumb_func + .type art_quick_read_barrier_mark_introspection_narrow, #function + .hidden art_quick_read_barrier_mark_introspection_narrow + .global art_quick_read_barrier_mark_introspection_narrow +art_quick_read_barrier_mark_introspection_narrow: + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // Check for null or marked, lock word is loaded into R4. + BRBMI_CHECK_NULL_AND_MARKED _narrow + // Load the 16-bit instruction. + BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET +.Lmark_introspection_extract_register_and_return_narrow: + and r4, r4, #7 // Extract `ref_reg`. .Lmark_introspection_return_switch: tbb [pc, r4] // Jump to the switch case. .Lmark_introspection_return_table: @@ -2320,32 +2417,8 @@ ENTRY art_quick_read_barrier_mark_introspection .balign 16 BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE - .balign 16 -.Lmark_introspection_unmarked: - // Check if the top two bits are one, if this is the case it is a forwarding address. -#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) - // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in - // the highest bits and the "forwarding address" state to have all bits set. -#error "Unexpected lock word state shift or forwarding address state value." -#endif - cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) - bhs .Lmark_introspection_forwarding_address - BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET - - .balign 8 -.Lmark_introspection_forwarding_address: - // Shift left by the forwarding address shift. This clears out the state bits since they are - // in the top 2 bits of the lock word. - lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT - b .Lmark_introspection_return - - .balign 64 - .thumb_func - .type art_quick_read_barrier_mark_introspection_gc_roots, #function - .hidden art_quick_read_barrier_mark_introspection_gc_roots - .global art_quick_read_barrier_mark_introspection_gc_roots -art_quick_read_barrier_mark_introspection_gc_roots: - BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow .balign 256 .thumb_func diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc index 13979160bd..f21f1a2704 100644 --- a/runtime/dex_file_annotations.cc +++ b/runtime/dex_file_annotations.cc @@ -1421,11 +1421,20 @@ mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirro } const char* GetSourceDebugExtension(Handle<mirror::Class> klass) { + // Before instantiating ClassData, check that klass has a DexCache + // assigned. The ClassData constructor indirectly dereferences it + // when calling klass->GetDexFile(). + if (klass->GetDexCache() == nullptr) { + DCHECK(klass->IsPrimitive() || klass->IsArrayClass()); + return nullptr; + } + ClassData data(klass); const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data); if (annotation_set == nullptr) { return nullptr; } + const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet( data.GetDexFile(), annotation_set, @@ -1434,6 +1443,7 @@ const char* GetSourceDebugExtension(Handle<mirror::Class> klass) { if (annotation_item == nullptr) { return nullptr; } + const uint8_t* annotation = SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value"); if (annotation == nullptr) { diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index a450a751b8..8b80f54880 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -616,25 +616,8 @@ void ConcurrentCopying::FlipThreadRoots() { ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_); FlipCallback flip_callback(this); - // This is the point where Concurrent-Copying will pause all threads. We report a pause here, if - // necessary. This is slightly over-reporting, as this includes the time to actually suspend - // threads. - { - GcPauseListener* pause_listener = GetHeap()->GetGcPauseListener(); - if (pause_listener != nullptr) { - pause_listener->StartPause(); - } - } - - size_t barrier_count = Runtime::Current()->FlipThreadRoots( - &thread_flip_visitor, &flip_callback, this); - - { - GcPauseListener* pause_listener = GetHeap()->GetGcPauseListener(); - if (pause_listener != nullptr) { - pause_listener->EndPause(); - } - } + size_t barrier_count = Runtime::Current()->GetThreadList()->FlipThreadRoots( + &thread_flip_visitor, &flip_callback, this, GetHeap()->GetGcPauseListener()); { ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); diff --git a/runtime/oat.h b/runtime/oat.h index a38eebc188..b7c715cc03 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '1', '2', '4', '\0' }; // New compiler filter names. + static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' }; // ARM Baker narrow thunks. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; @@ -175,6 +175,7 @@ class PACKED(4) OatMethodOffsets { ~OatMethodOffsets(); + OatMethodOffsets(const OatMethodOffsets&) = default; OatMethodOffsets& operator=(const OatMethodOffsets&) = default; uint32_t code_offset_; diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h index f2a2af2a5f..152b0ba21b 100644 --- a/runtime/oat_quick_method_header.h +++ b/runtime/oat_quick_method_header.h @@ -54,6 +54,7 @@ class PACKED(4) OatQuickMethodHeader { return FromCodePointer(EntryPointToCodePointer(entry_point)); } + OatQuickMethodHeader(const OatQuickMethodHeader&) = default; OatQuickMethodHeader& operator=(const OatQuickMethodHeader&) = default; uintptr_t NativeQuickPcOffset(const uintptr_t pc) const { diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc index 0921ceae05..9be486e269 100644 --- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc +++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc @@ -1205,6 +1205,30 @@ class JvmtiFunctions { return error; } + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::IterateThroughHeapExt), + "com.android.art.heap.iterate_through_heap_ext", + "Iterate through a heap. This is equivalent to the standard IterateThroughHeap function," + " except for additionally passing the heap id of the current object. The jvmtiHeapCallbacks" + " structure is reused, with the callbacks field overloaded to a signature of " + "jint (*)(jlong, jlong, jlong*, jint length, void*, jint).", + 4, + { // NOLINT [whitespace/braces] [4] + { "heap_filter", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false}, + { "klass", JVMTI_KIND_IN, JVMTI_TYPE_JCLASS, true}, + { "callbacks", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, false}, + { "user_data", JVMTI_KIND_IN_PTR, JVMTI_TYPE_CVOID, true} + }, + 3, + { // NOLINT [whitespace/braces] [4] + JVMTI_ERROR_MUST_POSSESS_CAPABILITY, + JVMTI_ERROR_INVALID_CLASS, + JVMTI_ERROR_NULL_POINTER + }); + if (error != ERR(NONE)) { + return error; + } + // Copy into output buffer. *extension_count_ptr = ext_vector.size(); diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc index 9b4dcaa9d0..99774c67b5 100644 --- a/runtime/openjdkjvmti/ti_heap.cc +++ b/runtime/openjdkjvmti/ti_heap.cc @@ -651,14 +651,17 @@ void HeapUtil::Unregister() { art::Runtime::Current()->RemoveSystemWeakHolder(&gIndexCachingTable); } +template <typename Callback> struct IterateThroughHeapData { - IterateThroughHeapData(HeapUtil* _heap_util, + IterateThroughHeapData(Callback _cb, + ObjectTagTable* _tag_table, jvmtiEnv* _env, art::ObjPtr<art::mirror::Class> klass, jint _heap_filter, const jvmtiHeapCallbacks* _callbacks, const void* _user_data) - : heap_util(_heap_util), + : cb(_cb), + tag_table(_tag_table), heap_filter(_heap_filter), filter_klass(klass), env(_env), @@ -667,95 +670,89 @@ struct IterateThroughHeapData { stop_reports(false) { } - HeapUtil* heap_util; - const HeapFilter heap_filter; - art::ObjPtr<art::mirror::Class> filter_klass; - jvmtiEnv* env; - const jvmtiHeapCallbacks* callbacks; - const void* user_data; - - bool stop_reports; -}; - -static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg) - REQUIRES_SHARED(art::Locks::mutator_lock_) { - IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg); - // Early return, as we can't really stop visiting. - if (ithd->stop_reports) { - return; + static void ObjectCallback(art::mirror::Object* obj, void* arg) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg); + ithd->ObjectCallback(obj); } - art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback"); + void ObjectCallback(art::mirror::Object* obj) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + // Early return, as we can't really stop visiting. + if (stop_reports) { + return; + } - jlong tag = 0; - ithd->heap_util->GetTags()->GetTag(obj, &tag); + art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback"); - jlong class_tag = 0; - art::ObjPtr<art::mirror::Class> klass = obj->GetClass(); - ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag); - // For simplicity, even if we find a tag = 0, assume 0 = not tagged. + jlong tag = 0; + tag_table->GetTag(obj, &tag); - if (!ithd->heap_filter.ShouldReportByHeapFilter(tag, class_tag)) { - return; - } + jlong class_tag = 0; + art::ObjPtr<art::mirror::Class> klass = obj->GetClass(); + tag_table->GetTag(klass.Ptr(), &class_tag); + // For simplicity, even if we find a tag = 0, assume 0 = not tagged. - if (ithd->filter_klass != nullptr) { - if (ithd->filter_klass != klass) { + if (!heap_filter.ShouldReportByHeapFilter(tag, class_tag)) { return; } - } - jlong size = obj->SizeOf(); + if (filter_klass != nullptr) { + if (filter_klass != klass) { + return; + } + } - jint length = -1; - if (obj->IsArrayInstance()) { - length = obj->AsArray()->GetLength(); - } + jlong size = obj->SizeOf(); - jlong saved_tag = tag; - jint ret = ithd->callbacks->heap_iteration_callback(class_tag, - size, - &tag, - length, - const_cast<void*>(ithd->user_data)); + jint length = -1; + if (obj->IsArrayInstance()) { + length = obj->AsArray()->GetLength(); + } - if (tag != saved_tag) { - ithd->heap_util->GetTags()->Set(obj, tag); - } + jlong saved_tag = tag; + jint ret = cb(obj, callbacks, class_tag, size, &tag, length, const_cast<void*>(user_data)); - ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0; + if (tag != saved_tag) { + tag_table->Set(obj, tag); + } - if (!ithd->stop_reports) { - jint string_ret = ReportString(obj, - ithd->env, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); - ithd->stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0; - } + stop_reports = (ret & JVMTI_VISIT_ABORT) != 0; - if (!ithd->stop_reports) { - jint array_ret = ReportPrimitiveArray(obj, - ithd->env, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); - ithd->stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0; - } + if (!stop_reports) { + jint string_ret = ReportString(obj, env, tag_table, callbacks, user_data); + stop_reports = (string_ret & JVMTI_VISIT_ABORT) != 0; + } + + if (!stop_reports) { + jint array_ret = ReportPrimitiveArray(obj, env, tag_table, callbacks, user_data); + stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0; + } - if (!ithd->stop_reports) { - ithd->stop_reports = ReportPrimitiveField::Report(obj, - ithd->heap_util->GetTags(), - ithd->callbacks, - ithd->user_data); + if (!stop_reports) { + stop_reports = ReportPrimitiveField::Report(obj, tag_table, callbacks, user_data); + } } -} -jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, - jint heap_filter, - jclass klass, - const jvmtiHeapCallbacks* callbacks, - const void* user_data) { + Callback cb; + ObjectTagTable* tag_table; + const HeapFilter heap_filter; + art::ObjPtr<art::mirror::Class> filter_klass; + jvmtiEnv* env; + const jvmtiHeapCallbacks* callbacks; + const void* user_data; + + bool stop_reports; +}; + +template <typename T> +static jvmtiError DoIterateThroughHeap(T fn, + jvmtiEnv* env, + ObjectTagTable* tag_table, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { if (callbacks == nullptr) { return ERR(NULL_POINTER); } @@ -763,16 +760,46 @@ jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, art::Thread* self = art::Thread::Current(); art::ScopedObjectAccess soa(self); // Now we know we have the shared lock. - IterateThroughHeapData ithd(this, + using Iterator = IterateThroughHeapData<T>; + Iterator ithd(fn, + tag_table, + env, + soa.Decode<art::mirror::Class>(klass), + heap_filter, + callbacks, + user_data); + + art::Runtime::Current()->GetHeap()->VisitObjects(Iterator::ObjectCallback, &ithd); + + return ERR(NONE); +} + +jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { + auto JvmtiIterateHeap = [](art::mirror::Object* obj ATTRIBUTE_UNUSED, + const jvmtiHeapCallbacks* cb_callbacks, + jlong class_tag, + jlong size, + jlong* tag, + jint length, + void* cb_user_data) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + return cb_callbacks->heap_iteration_callback(class_tag, + size, + tag, + length, + cb_user_data); + }; + return DoIterateThroughHeap(JvmtiIterateHeap, env, - soa.Decode<art::mirror::Class>(klass), + ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(), heap_filter, + klass, callbacks, user_data); - - art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd); - - return ERR(NONE); } class FollowReferencesHelper FINAL { @@ -1406,6 +1433,33 @@ static constexpr jint kHeapIdImage = 1; static constexpr jint kHeapIdZygote = 2; static constexpr jint kHeapIdApp = 3; +static jint GetHeapId(art::ObjPtr<art::mirror::Object> obj) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + if (obj == nullptr) { + return -1; + } + + art::gc::Heap* const heap = art::Runtime::Current()->GetHeap(); + const art::gc::space::ContinuousSpace* const space = + heap->FindContinuousSpaceFromObject(obj, true); + jint heap_type = kHeapIdApp; + if (space != nullptr) { + if (space->IsZygoteSpace()) { + heap_type = kHeapIdZygote; + } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { + // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects + // as HPROF_HEAP_APP. b/35762934 + heap_type = kHeapIdImage; + } + } else { + const auto* los = heap->GetLargeObjectsSpace(); + if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(art::Thread::Current(), obj.Ptr())) { + heap_type = kHeapIdZygote; + } + } + return heap_type; +}; + jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) { if (heap_id == nullptr) { return ERR(NULL_POINTER); @@ -1416,28 +1470,10 @@ jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_ auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) { ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(); art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag); - if (obj == nullptr) { + jint heap_type = GetHeapId(obj); + if (heap_type == -1) { return ERR(NOT_FOUND); } - - art::gc::Heap* const heap = art::Runtime::Current()->GetHeap(); - const art::gc::space::ContinuousSpace* const space = - heap->FindContinuousSpaceFromObject(obj, true); - jint heap_type = kHeapIdApp; - if (space != nullptr) { - if (space->IsZygoteSpace()) { - heap_type = kHeapIdZygote; - } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { - // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects - // as HPROF_HEAP_APP. b/35762934 - heap_type = kHeapIdImage; - } - } else { - const auto* los = heap->GetLargeObjectsSpace(); - if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(self, obj.Ptr())) { - heap_type = kHeapIdZygote; - } - } *heap_id = heap_type; return ERR(NONE); }; @@ -1491,4 +1527,36 @@ jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_ } } +jvmtiError HeapExtensions::IterateThroughHeapExt(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data) { + if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.can_tag_objects != 1) { \ + return ERR(MUST_POSSESS_CAPABILITY); \ + } + + // ART extension API: Also pass the heap id. + auto ArtIterateHeap = [](art::mirror::Object* obj, + const jvmtiHeapCallbacks* cb_callbacks, + jlong class_tag, + jlong size, + jlong* tag, + jint length, + void* cb_user_data) + REQUIRES_SHARED(art::Locks::mutator_lock_) { + jint heap_id = GetHeapId(obj); + using ArtExtensionAPI = jint (*)(jlong, jlong, jlong*, jint length, void*, jint); + return reinterpret_cast<ArtExtensionAPI>(cb_callbacks->heap_iteration_callback)( + class_tag, size, tag, length, cb_user_data, heap_id); + }; + return DoIterateThroughHeap(ArtIterateHeap, + env, + ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(), + heap_filter, + klass, + callbacks, + user_data); +} + } // namespace openjdkjvmti diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h index b4b71ba88e..0c973db199 100644 --- a/runtime/openjdkjvmti/ti_heap.h +++ b/runtime/openjdkjvmti/ti_heap.h @@ -60,6 +60,12 @@ class HeapExtensions { public: static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...); static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...); + + static jvmtiError JNICALL IterateThroughHeapExt(jvmtiEnv* env, + jint heap_filter, + jclass klass, + const jvmtiHeapCallbacks* callbacks, + const void* user_data); }; } // namespace openjdkjvmti diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 60fa0828a0..0bc0869044 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1816,11 +1816,6 @@ void Runtime::VisitThreadRoots(RootVisitor* visitor, VisitRootFlags flags) { thread_list_->VisitRoots(visitor, flags); } -size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback, - gc::collector::GarbageCollector* collector) { - return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector); -} - void Runtime::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) { VisitNonConcurrentRoots(visitor, flags); VisitConcurrentRoots(visitor, flags); diff --git a/runtime/runtime.h b/runtime/runtime.h index a2505e2292..4931382e55 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -48,9 +48,6 @@ namespace art { namespace gc { class AbstractSystemWeakHolder; class Heap; - namespace collector { - class GarbageCollector; - } // namespace collector } // namespace gc namespace jit { @@ -79,7 +76,6 @@ class ArenaPool; class ArtMethod; class ClassHierarchyAnalysis; class ClassLinker; -class Closure; class CompilerCallbacks; class DexFile; class InternTable; @@ -340,11 +336,6 @@ class Runtime { void VisitTransactionRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_); - // Flip thread roots from from-space refs to to-space refs. - size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback, - gc::collector::GarbageCollector* collector) - REQUIRES(!Locks::mutator_lock_); - // Sweep system weaks, the system weak is deleted if the visitor return null. Otherwise, the // system weak is updated to be the visitor's returned value. void SweepSystemWeaks(IsMarkedVisitor* visitor) @@ -948,7 +939,8 @@ class Runtime { std::unique_ptr<RuntimeCallbacks> callbacks_; - std::atomic<uint32_t> deoptimization_counts_[static_cast<uint32_t>(DeoptimizationKind::kLast)]; + std::atomic<uint32_t> deoptimization_counts_[ + static_cast<uint32_t>(DeoptimizationKind::kLast) + 1]; DISALLOW_COPY_AND_ASSIGN(Runtime); }; diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index b63eaa40ef..dc2af2ae34 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -34,6 +34,7 @@ #include "base/timing_logger.h" #include "debugger.h" #include "gc/collector/concurrent_copying.h" +#include "gc/gc_pause_listener.h" #include "gc/reference_processor.h" #include "jni_internal.h" #include "lock_word.h" @@ -528,7 +529,8 @@ size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) // invariant. size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback, - gc::collector::GarbageCollector* collector) { + gc::collector::GarbageCollector* collector, + gc::GcPauseListener* pause_listener) { TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings()); Thread* self = Thread::Current(); Locks::mutator_lock_->AssertNotHeld(self); @@ -542,6 +544,9 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, // pause. const uint64_t suspend_start_time = NanoTime(); SuspendAllInternal(self, self, nullptr); + if (pause_listener != nullptr) { + pause_listener->StartPause(); + } // Run the flip callback for the collector. Locks::mutator_lock_->ExclusiveLock(self); @@ -549,6 +554,9 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor, flip_callback->Run(self); Locks::mutator_lock_->ExclusiveUnlock(self); collector->RegisterPause(NanoTime() - suspend_start_time); + if (pause_listener != nullptr) { + pause_listener->EndPause(); + } // Resume runnable threads. size_t runnable_thread_count = 0; diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 14bef5e2b9..337574603b 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -35,6 +35,7 @@ namespace gc { namespace collector { class GarbageCollector; } // namespac collector + class GcPauseListener; } // namespace gc class Closure; class Thread; @@ -121,7 +122,8 @@ class ThreadList { // the concurrent copying collector. size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback, - gc::collector::GarbageCollector* collector) + gc::collector::GarbageCollector* collector, + gc::GcPauseListener* pause_listener) REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt index 852ec2e5e9..9c64111027 100644 --- a/test/115-native-bridge/expected.txt +++ b/test/115-native-bridge/expected.txt @@ -62,3 +62,8 @@ trampoline_Java_Main_testNewStringObject called! Getting trampoline for Java_Main_testSignal with shorty I. NB signal handler with signal 11. NB signal handler with signal 4. +Loading invalid library 'libinvalid.so' from Java, which will fail. +Checking for support. +Was to load 'libinvalid.so', force fail. +getError() in native bridge. +Catch UnsatisfiedLinkError exception as expected. diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc index 87287f8acf..b3b89491bf 100644 --- a/test/115-native-bridge/nativebridge.cc +++ b/test/115-native-bridge/nativebridge.cc @@ -285,6 +285,10 @@ extern "C" bool native_bridge_initialize(const android::NativeBridgeRuntimeCallb } extern "C" void* native_bridge_loadLibrary(const char* libpath, int flag) { + if (strstr(libpath, "libinvalid.so") != nullptr) { + printf("Was to load 'libinvalid.so', force fail.\n"); + return nullptr; + } size_t len = strlen(libpath); char* tmp = new char[len + 10]; strncpy(tmp, libpath, len); @@ -300,7 +304,7 @@ extern "C" void* native_bridge_loadLibrary(const char* libpath, int flag) { printf("Handle = nullptr!\n"); printf("Was looking for %s.\n", libpath); printf("Error = %s.\n", dlerror()); - char cwd[1024]; + char cwd[1024] = {'\0'}; if (getcwd(cwd, sizeof(cwd)) != nullptr) { printf("Current working dir: %s\n", cwd); } @@ -437,8 +441,8 @@ extern "C" int native_bridge_unloadLibrary(void* handle ATTRIBUTE_UNUSED) { } extern "C" const char* native_bridge_getError() { - printf("dlerror() in native bridge.\n"); - return nullptr; + printf("getError() in native bridge.\n"); + return ""; } extern "C" bool native_bridge_isPathSupported(const char* library_path ATTRIBUTE_UNUSED) { diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run index 9290dd3cf4..22f5c67ddc 100644 --- a/test/115-native-bridge/run +++ b/test/115-native-bridge/run @@ -23,6 +23,7 @@ LIBPATH=${LIBPATH##*:} ln -sf ${LIBPATH}/libnativebridgetest.so . touch libarttest.so touch libarttestd.so +touch libinvalid.so ln -sf ${LIBPATH}/libarttest.so libarttest2.so ln -sf ${LIBPATH}/libarttestd.so libarttestd2.so diff --git a/test/115-native-bridge/src/NativeBridgeMain.java b/test/115-native-bridge/src/NativeBridgeMain.java index c298b1b772..e8d1e4e326 100644 --- a/test/115-native-bridge/src/NativeBridgeMain.java +++ b/test/115-native-bridge/src/NativeBridgeMain.java @@ -16,6 +16,7 @@ import java.lang.reflect.Method; import java.lang.System; +import java.lang.Exception; // This is named Main as it is a copy of JniTest, so that we can re-use the native implementations // from libarttest. @@ -33,6 +34,7 @@ class Main { testEnvironment(); testNewStringObject(); testSignalHandler(); + testGetErrorByLoadInvalidLibrary(); } public static native void testFindClassOnAttachedNativeThread(); @@ -183,6 +185,20 @@ class Main { } private static native int testSignal(); + + // Test the path from Java to getError() of NativeBridge. + // + // Load invalid library 'libinvalid.so' from Java. Library loading will fail since it's + // invalid (empty file). ART, NativeLoader actually, calls getError() to dump error message. + // After that in Java, catch UnsatisfiedLinkError exception to confirm. + private static void testGetErrorByLoadInvalidLibrary() { + System.out.println("Loading invalid library 'libinvalid.so' from Java, which will fail."); + try { + System.loadLibrary("invalid"); + } catch (java.lang.UnsatisfiedLinkError e){ + System.out.println("Catch UnsatisfiedLinkError exception as expected."); + } + } } public class NativeBridgeMain { diff --git a/test/409-materialized-condition/src/Main.java b/test/409-materialized-condition/src/Main.java index 8a814a2da1..0c179a99de 100644 --- a/test/409-materialized-condition/src/Main.java +++ b/test/409-materialized-condition/src/Main.java @@ -50,36 +50,6 @@ public class Main { return b; } - public static boolean $noinline$intEq0(int x) { - return x == 0; - } - - public static boolean $noinline$intNe0(int x) { - return x != 0; - } - - public static boolean $noinline$longEq0(long x) { - return x == 0; - } - - public static boolean $noinline$longNe0(long x) { - return x != 0; - } - - public static boolean $noinline$longEqCst(long x) { - return x == 0x0123456789ABCDEFL; - } - - public static boolean $noinline$longNeCst(long x) { - return x != 0x0123456789ABCDEFL; - } - - public static void assertEqual(boolean expected, boolean actual) { - if (expected != actual) { - throw new Error("Assertion failed: " + expected + " != " + actual); - } - } - public static void main(String[] args) { System.out.println("foo1"); int res = foo1(); @@ -92,46 +62,5 @@ public class Main { if (res != 42) { throw new Error("Unexpected return value for foo2: " + res + ", expected 42."); } - - int[] int_inputs = {0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE, 42, -9000}; - long[] long_inputs = { - 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 0x100000000L, - 0x100000001L, -9000L, 0x0123456789ABCDEFL}; - - boolean[] int_eq_0_expected = {true, false, false, false, false, false, false}; - - for (int i = 0; i < int_inputs.length; i++) { - assertEqual(int_eq_0_expected[i], $noinline$intEq0(int_inputs[i])); - } - - boolean[] int_ne_0_expected = {false, true, true, true, true, true, true}; - - for (int i = 0; i < int_inputs.length; i++) { - assertEqual(int_ne_0_expected[i], $noinline$intNe0(int_inputs[i])); - } - - boolean[] long_eq_0_expected = {true, false, false, false, false, false, false, false, false}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(long_eq_0_expected[i], $noinline$longEq0(long_inputs[i])); - } - - boolean[] long_ne_0_expected = {false, true, true, true, true, true, true, true, true}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(long_ne_0_expected[i], $noinline$longNe0(long_inputs[i])); - } - - boolean[] long_eq_cst_expected = {false, false, false, false, false, false, false, false, true}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(long_eq_cst_expected[i], $noinline$longEqCst(long_inputs[i])); - } - - boolean[] long_ne_cst_expected = {true, true, true, true, true, true, true, true, false}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(long_ne_cst_expected[i], $noinline$longNeCst(long_inputs[i])); - } } } diff --git a/test/527-checker-array-access-simd/expected.txt b/test/527-checker-array-access-simd/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/527-checker-array-access-simd/expected.txt diff --git a/test/527-checker-array-access-simd/info.txt b/test/527-checker-array-access-simd/info.txt new file mode 100644 index 0000000000..f147943043 --- /dev/null +++ b/test/527-checker-array-access-simd/info.txt @@ -0,0 +1 @@ +Test arm- and arm64-specific array access optimization for simd loops. diff --git a/test/527-checker-array-access-simd/src/Main.java b/test/527-checker-array-access-simd/src/Main.java new file mode 100644 index 0000000000..8af5465faf --- /dev/null +++ b/test/527-checker-array-access-simd/src/Main.java @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Index>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: VecStore [<<Array>>,<<Address2>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkIntCase(int[]) GVN$after_arch (after) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK-DAG: VecStore [<<Array>>,<<Address1>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkIntCase(int[]) disassembly (after) + /// CHECK: IntermediateAddressIndex + /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2 + public static void checkIntCase(int[] a) { + for (int i = 0; i < 128; i++) { + a[i] += 5; + } + } + + /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Index>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>] + /// CHECK-DAG: VecStore [<<Array>>,<<Address2>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) GVN$after_arch (after) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>] + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK-DAG: VecStore [<<Array>>,<<Address1>>,<<Add>>] + + /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) disassembly (after) + /// CHECK: IntermediateAddressIndex + /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, #0x{{[0-9a-fA-F]+}} + /// CHECK: VecLoad + /// CHECK-NEXT: ldr q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}] + /// CHECK: VecStore + /// CHECK-NEXT: str q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}] + public static void checkByteCase(byte[] a) { + for (int i = 0; i < 128; i++) { + a[i] += 5; + } + } + + /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Repl>>] + + /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<Array:l\d+>> ParameterValue + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>] + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Repl>>] + /// CHECK-NOT: IntermediateAddress + public static void checkSingleAccess(int[] a) { + for (int i = 0; i < 128; i++) { + a[i] = 5; + } + } + + /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<Array1:l\d+>> ParameterValue + /// CHECK-DAG: <<Array2:l\d+>> ParameterValue + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Index>>] + /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>] + /// CHECK-DAG: VecStore [<<Array2>>,<<Index>>,<<Cnv>>] + + /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<Array1:l\d+>> ParameterValue + /// CHECK-DAG: <<Array2:l\d+>> ParameterValue + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Address1>>] + /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>] + /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: VecStore [<<Array2>>,<<Address2>>,<<Cnv>>] + + /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) GVN$after_arch (after) + /// CHECK-DAG: <<Array1:l\d+>> ParameterValue + /// CHECK-DAG: <<Array2:l\d+>> ParameterValue + /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + // -------------- Loop + /// CHECK-DAG: <<Index:i\d+>> Phi + /// CHECK-DAG: If + /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>] + /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Address1>>] + /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>] + /// CHECK-NOT: IntermediateAddress + /// CHECK-DAG: VecStore [<<Array2>>,<<Address1>>,<<Cnv>>] + + /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) disassembly (after) + /// CHECK: IntermediateAddressIndex + /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2 + public static void checkInt2Float(int[] a, float[] b) { + for (int i = 0; i < 128; i++) { + b[i] = (float) a[i]; + } + } + + public static final int ARRAY_SIZE = 1024; + + public static int calcArraySum(int[] a, byte[] b, float[] c) { + int sum = 0; + for (int i = 0; i < 128; i++) { + sum += a[i] + b[i] + (int) c[i]; + } + return sum; + } + + public static void main(String[] args) { + byte[] ba = new byte[ARRAY_SIZE]; + int[] ia = new int[ARRAY_SIZE]; + float[] fa = new float[ARRAY_SIZE]; + + checkSingleAccess(ia); + checkIntCase(ia); + checkByteCase(ba); + checkInt2Float(ia, fa); + + assertIntEquals(3200, calcArraySum(ia, ba, fa)); + } +} diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java index 2dad14ce31..3ac6f89c5f 100644 --- a/test/570-checker-select/src/Main.java +++ b/test/570-checker-select/src/Main.java @@ -414,46 +414,6 @@ public class Main { return a > 0x7FFFFFFFFFFFFFFFL ? x : y; } - /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar4(long, long, long) disassembly (after) - /// CHECK: Select - /// CHECK-NEXT: orrs ip, {{r\d+}}, {{r\d+}} - /// CHECK-NOT: cmp - /// CHECK-NOT: sbcs - - public static long $noinline$LongNonmatCondCst_LongVarVar4(long a, long x, long y) { - return a == 0 ? x : y; - } - - /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar5(long, long, long) disassembly (after) - /// CHECK: Select - /// CHECK-NEXT: orrs ip, {{r\d+}}, {{r\d+}} - /// CHECK-NOT: cmp - /// CHECK-NOT: sbcs - - public static long $noinline$LongNonmatCondCst_LongVarVar5(long a, long x, long y) { - return a != 0 ? x : y; - } - - /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar6(long, long, long) disassembly (after) - /// CHECK: Select - /// CHECK-NEXT: cmp {{r\d+}}, #0 - /// CHECK-NOT: cmp - /// CHECK-NOT: sbcs - - public static long $noinline$LongNonmatCondCst_LongVarVar6(long a, long x, long y) { - return a >= 0 ? x : y; - } - - /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar7(long, long, long) disassembly (after) - /// CHECK: Select - /// CHECK-NEXT: cmp {{r\d+}}, #0 - /// CHECK-NOT: cmp - /// CHECK-NOT: sbcs - - public static long $noinline$LongNonmatCondCst_LongVarVar7(long a, long x, long y) { - return a < 0 ? x : y; - } - /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after) /// CHECK: <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}] /// CHECK: <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>] @@ -728,37 +688,6 @@ public class Main { assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L)); - long[] long_inputs = { - 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 2L, 0x100000000L, 0xFFFFFFFF00000000L, -9000L}; - - long[] expected_1 = {5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(expected_1[i], $noinline$LongNonmatCondCst_LongVarVar4(long_inputs[i], 5L, 7L)); - } - - long[] expected_2 = {7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(expected_2[i], $noinline$LongNonmatCondCst_LongVarVar5(long_inputs[i], 5L, 7L)); - } - - long[] expected_3 = {5L, 5L, 7L, 7L, 5L, 5L, 5L, 7L, 7L}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(expected_3[i], $noinline$LongNonmatCondCst_LongVarVar6(long_inputs[i], 5L, 7L)); - } - - long[] expected_4 = {7L, 7L, 5L, 5L, 7L, 7L, 7L, 5L, 5L}; - - for (int i = 0; i < long_inputs.length; i++) { - assertEqual(expected_4[i], $noinline$LongNonmatCondCst_LongVarVar7(long_inputs[i], 5L, 7L)); - } - - assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(0L, 5L, 7L)); - assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(2L, 5L, 7L)); - assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar7(-9000L, 5L, 7L)); - assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7)); assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7)); assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7)); diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc index f39c5f16d7..ec36cebd43 100644 --- a/test/913-heaps/heaps.cc +++ b/test/913-heaps/heaps.cc @@ -823,6 +823,14 @@ static GetObjectHeapId gGetObjectHeapIdFn = nullptr; using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...); static GetHeapName gGetHeapNameFn = nullptr; +using IterateThroughHeapExt = jvmtiError(*)(jvmtiEnv*, + jint, + jclass, + const jvmtiHeapCallbacks*, + const void*); +static IterateThroughHeapExt gIterateThroughHeapExt = nullptr; + + static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) { for (size_t i = 0; i != static_cast<size_t>(count); ++i) { jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id)); @@ -886,6 +894,38 @@ extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis( CHECK(extensions[i].errors != nullptr); CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT); } + + if (strcmp("com.android.art.heap.iterate_through_heap_ext", extensions[i].id) == 0) { + CHECK(gIterateThroughHeapExt == nullptr); + gIterateThroughHeapExt = reinterpret_cast<IterateThroughHeapExt>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 4); + + CHECK_EQ(strcmp("heap_filter", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("klass", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JCLASS); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_IN); + CHECK_EQ(extensions[i].params[1].null_ok, true); + + CHECK_EQ(strcmp("callbacks", extensions[i].params[2].name), 0); + CHECK_EQ(extensions[i].params[2].base_type, JVMTI_TYPE_CVOID); + CHECK_EQ(extensions[i].params[2].kind, JVMTI_KIND_IN_PTR); + CHECK_EQ(extensions[i].params[2].null_ok, false); + + CHECK_EQ(strcmp("user_data", extensions[i].params[3].name), 0); + CHECK_EQ(extensions[i].params[3].base_type, JVMTI_TYPE_CVOID); + CHECK_EQ(extensions[i].params[3].kind, JVMTI_KIND_IN_PTR); + CHECK_EQ(extensions[i].params[3].null_ok, true); + + CHECK_EQ(extensions[i].error_count, 3); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_MUST_POSSESS_CAPABILITY); + CHECK(extensions[i].errors[1] == JVMTI_ERROR_INVALID_CLASS); + CHECK(extensions[i].errors[2] == JVMTI_ERROR_NULL_POINTER); + } } CHECK(gGetObjectHeapIdFn != nullptr); @@ -1004,5 +1044,39 @@ extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallbac } } +static bool gFoundExt = false; + +static jint JNICALL HeapIterationExtCallback(jlong class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jint length ATTRIBUTE_UNUSED, + void* user_data ATTRIBUTE_UNUSED, + jint heap_id) { + // We expect some tagged objects at or above the threshold, where the expected heap id is + // encoded into lowest byte. + constexpr jlong kThreshold = 30000000; + jlong tag = *tag_ptr; + if (tag >= kThreshold) { + jint expected_heap_id = static_cast<jint>(tag - kThreshold); + CHECK_EQ(expected_heap_id, heap_id); + gFoundExt = true; + } + return 0; +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_iterateThroughHeapExt( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) { + CHECK(gIterateThroughHeapExt != nullptr); + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_iteration_callback = + reinterpret_cast<decltype(callbacks.heap_iteration_callback)>(HeapIterationExtCallback); + + jvmtiError ret = gIterateThroughHeapExt(jvmti_env, 0, nullptr, &callbacks, nullptr); + JvmtiErrorToException(env, jvmti_env, ret); + CHECK(gFoundExt); +} + } // namespace Test913Heaps } // namespace art diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java index 6694aad868..97f48eea03 100644 --- a/test/913-heaps/src/art/Test913.java +++ b/test/913-heaps/src/art/Test913.java @@ -261,6 +261,15 @@ public class Test913 { checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId); checkGetObjectHeapIdInCallback(100001, 3); + long baseTag = 30000000; + setTag(Object.class, baseTag + objClassExpectedHeapId); + setTag(Class.class, baseTag + objClassExpectedHeapId); + Object o = new Object(); + extensionTestHolder.add(o); + setTag(o, baseTag + 3); + + iterateThroughHeapExt(); + extensionTestHolder = null; } @@ -719,4 +728,6 @@ public class Test913 { public static native String[] followReferencesString(Object initialObject); public static native String followReferencesPrimitiveArray(Object initialObject); public static native String followReferencesPrimitiveFields(Object initialObject); + + private static native void iterateThroughHeapExt(); } diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py index 9a437cc822..c99159f1ae 100755 --- a/test/testrunner/testrunner.py +++ b/test/testrunner/testrunner.py @@ -828,7 +828,15 @@ def get_default_threads(target): adb_command = 'adb shell cat /sys/devices/system/cpu/present' cpu_info_proc = subprocess.Popen(adb_command.split(), stdout=subprocess.PIPE) cpu_info = cpu_info_proc.stdout.read() - return int(cpu_info.split('-')[1]) + if type(cpu_info) is bytes: + cpu_info = cpu_info.decode('utf-8') + cpu_info_regex = '\d*-(\d*)' + match = re.match(cpu_info_regex, cpu_info) + if match: + return int(match.group(1)) + else: + raise ValueError('Unable to predict the concurrency for the target. ' + 'Is device connected?') else: return multiprocessing.cpu_count() |