summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp1
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.cc112
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.h39
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2_test.cc245
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.h2
-rw-r--r--compiler/optimizing/block_builder.cc20
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc4
-rw-r--r--compiler/optimizing/code_generator.cc2
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm.cc688
-rw-r--r--compiler/optimizing/code_generator_arm.h9
-rw-r--r--compiler/optimizing/code_generator_arm64.cc60
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc720
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h11
-rw-r--r--compiler/optimizing/code_generator_mips.cc14
-rw-r--r--compiler/optimizing/code_generator_mips64.cc14
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc6
-rw-r--r--compiler/optimizing/code_generator_x86.cc12
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc12
-rw-r--r--compiler/optimizing/codegen_test.cc30
-rw-r--r--compiler/optimizing/codegen_test_utils.h2
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc13
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc57
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h1
-rw-r--r--compiler/optimizing/intrinsics_arm.cc12
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc12
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc4
-rw-r--r--compiler/optimizing/nodes.h3
-rw-r--r--compiler/optimizing/nodes_shared.h43
-rw-r--r--compiler/optimizing/nodes_vector.h7
-rw-r--r--compiler/optimizing/optimizing_compiler.cc5
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc3
-rw-r--r--compiler/optimizing/scheduler.cc40
-rw-r--r--compiler/optimizing/scheduler.h5
-rw-r--r--compiler/optimizing/scheduler_arm.cc822
-rw-r--r--compiler/optimizing/scheduler_arm.h158
-rw-r--r--compiler/optimizing/scheduler_test.cc260
-rw-r--r--compiler/optimizing/sharpening.cc2
-rw-r--r--compiler/utils/arm/assembler_thumb2.h6
41 files changed, 2125 insertions, 1339 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a2b07af810..df896dc73c 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -115,6 +115,7 @@ art_cc_defaults {
"optimizing/intrinsics_arm.cc",
"optimizing/intrinsics_arm_vixl.cc",
"optimizing/nodes_shared.cc",
+ "optimizing/scheduler_arm.cc",
"utils/arm/assembler_arm.cc",
"utils/arm/assembler_arm_vixl.cc",
"utils/arm/assembler_thumb2.cc",
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index ced52ff07a..a98aedfc69 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -18,6 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "art_method.h"
+#include "base/bit_utils.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "lock_word.h"
@@ -112,12 +113,22 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co
// Check that the next instruction matches the expected LDR.
switch (kind) {
case BakerReadBarrierKind::kField: {
- DCHECK_GE(code->size() - literal_offset, 8u);
- uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
- // LDR (immediate) with correct base_reg.
- CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
- const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code->size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
break;
}
case BakerReadBarrierKind::kArray: {
@@ -131,11 +142,20 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co
break;
}
case BakerReadBarrierKind::kGcRoot: {
- DCHECK_GE(literal_offset, 4u);
- uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
- // LDR (immediate) with correct root_reg.
- const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
break;
}
default:
@@ -160,7 +180,8 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co
static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
vixl::aarch32::Register base_reg,
vixl::aarch32::MemOperand& lock_word,
- vixl::aarch32::Label* slow_path) {
+ vixl::aarch32::Label* slow_path,
+ int32_t raw_ldr_offset) {
using namespace vixl::aarch32; // NOLINT(build/namespaces)
// Load the lock word containing the rb_state.
__ Ldr(ip, lock_word);
@@ -169,14 +190,7 @@ static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
__ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
__ B(ne, slow_path, /* is_far_target */ false);
- static_assert(
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
- "Field and array LDR offsets must be the same to reuse the same code.");
- // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Add(lr, lr, raw_ldr_offset);
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
@@ -199,6 +213,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
CheckValidReg(base_reg.GetCode());
Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
// If base_reg differs from holder_reg, the offset was too large and we must have
@@ -210,16 +225,30 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
}
vixl::aarch32::Label slow_path;
MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET;
- MemOperand ldr_half_address(lr, ldr_offset + 2);
- __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
- __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
- __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ raw_ldr_offset;
+ Register ep_reg(kBakerCcEntrypointRegister);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
// Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
- __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint.
+ __ Bx(ep_reg); // Jump to the entrypoint.
if (holder_reg.Is(base_reg)) {
// Add null check slow path. The stack map is at the address pointed to by LR.
__ Bind(&throw_npe);
@@ -233,6 +262,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(base_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data));
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label slow_path;
@@ -240,10 +270,11 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ raw_ldr_offset;
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
@@ -261,6 +292,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(root_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label return_label, not_marked, forwarding_address;
@@ -280,7 +312,10 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler&
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
Register ep_reg(kBakerCcEntrypointRegister);
- __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
__ Mov(ip, root_reg);
__ Bx(ep_reg);
__ Bind(&forwarding_address);
@@ -344,7 +379,7 @@ uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
DCHECK_LE(offset + 4u, code->size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
uint8_t* addr = &(*code)[offset];
addr[0] = (value >> 16) & 0xff;
addr[1] = (value >> 24) & 0xff;
@@ -354,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse
uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
DCHECK_LE(offset + 4u, code.size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
const uint8_t* addr = &code[offset];
return
(static_cast<uint32_t>(addr[0]) << 16) +
@@ -369,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) {
return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
}
+uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 2u, code.size());
+ DCHECK_ALIGNED(offset, 2u);
+ const uint8_t* addr = &code[offset];
+ return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
+}
+
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
+ static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+ return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 7fad245856..7e787d2916 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -35,26 +35,37 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
public:
static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
- static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
CheckValidReg(base_reg);
CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(holder_reg);
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
}
static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
CheckValidReg(base_reg);
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
}
- static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
BakerReadBarrierFirstRegField::Encode(root_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
}
explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
@@ -86,6 +97,12 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
kLast
};
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast
+ };
+
static constexpr size_t kBitsForBakerReadBarrierKind =
MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
static constexpr size_t kBitsForRegister = 4u;
@@ -95,9 +112,14 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
using BakerReadBarrierSecondRegField =
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister);
+ DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
}
void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
@@ -108,6 +130,11 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
template <typename Vector>
static uint32_t GetInsn32(Vector* code, uint32_t offset);
+ static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Vector>
+ static uint32_t GetInsn16(Vector* code, uint32_t offset);
+
friend class Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 2e28349231..af5fa40dc1 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -52,6 +52,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
// BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+ // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
+ static constexpr uint32_t kLdrInsn = 0x6800u;
+
// LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
@@ -223,9 +226,11 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
void TestStringReference(uint32_t string_offset);
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -237,9 +242,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
- std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -260,7 +265,8 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest {
(static_cast<uint32_t>(output_[offset + 1]) << 8);
}
- void TestBakerField(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -568,7 +574,7 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) {
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
+void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -584,8 +590,8 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
ArrayRef<const uint8_t> code(raw_code);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ false);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
};
@@ -608,7 +614,113 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg
ASSERT_TRUE(
CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 32u);
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ true);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -666,15 +778,26 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg
}
}
-#define TEST_BAKER_FIELD(offset, ref_reg) \
- TEST_F(Thumb2RelativePatcherTest, \
- BakerOffset##offset##_##ref_reg) { \
- TestBakerField(offset, ref_reg); \
+#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetWide##offset##_##ref_reg) { \
+ TestBakerFieldWide(offset, ref_reg); \
}
-TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7)
-TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
+
+#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetNarrow##offset##_##ref_reg) { \
+ TestBakerFieldNarrow(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
// One thunk in the middle with maximum distance branches to it from both sides.
@@ -682,8 +805,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -710,7 +833,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kLiteralOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -749,8 +873,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
constexpr uint32_t kLiteralOffset1 = 4u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -779,8 +903,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -809,7 +933,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kReachableFromOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -929,7 +1054,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) {
}
}
-TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -945,7 +1070,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
ArrayRef<const uint8_t> code(raw_code);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(
- kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
};
AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
}
@@ -962,7 +1088,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
ASSERT_EQ(kMethodCodeSize, expected_code.size());
EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -972,7 +1098,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
ASSERT_TRUE(false);
}
- // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+ // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
if (root_reg < 8) {
ASSERT_GE(output_.size() - thunk_offset, 2u);
ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
@@ -988,6 +1114,60 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
}
}
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ // Not appplicable to high registers.
+ };
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 2u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
// Test 1MiB of patches to the same thunk to stress-test different large offsets.
// (The low bits are not that important but the location of the high bits is easy to get wrong.)
@@ -998,7 +1178,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
patches.reserve(num_patches);
const uint32_t ldr =
kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
- uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
for (size_t i = 0; i != num_patches; ++i) {
PushBackInsn(&code, ldr);
PushBackInsn(&code, kBneWPlus0);
@@ -1067,7 +1248,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
// this pushes the first GC root thunk's pending MaxNextOffset() before the method call
// thunk's pending MaxNextOffset() which needs to be adjusted.
ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
- CompileBakerGcRootThunk(/* root_reg */ 0).size());
+ CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
@@ -1080,9 +1261,9 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
ldr2, kBneWPlus0, // Second GC root LDR with read barrier.
});
uint32_t encoded_data1 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
uint32_t encoded_data2 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
const LinkerPatch last_method_patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index d1ab410a7e..02a5b1ef8f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -100,7 +100,7 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 30u && reg != 16u && reg != 17u);
+ DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
}
void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 5e70a8284d..1e75f10ebe 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -310,16 +310,18 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// least one predecessor is not covered by the same TryItem as the try block.
// We do not split each edge separately, but rather create one boundary block
// that all predecessors are relinked to. This preserves loop headers (b/23895756).
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
- if (GetTryItem(predecessor, try_block_info) != entry.second) {
+ if (GetTryItem(predecessor, try_block_info) != try_item) {
// Found a predecessor not covered by the same TryItem. Insert entering
// boundary block.
HTryBoundary* try_entry =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
try_block->CreateImmediateDominator()->AddInstruction(try_entry);
- LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks);
break;
}
}
@@ -327,8 +329,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// Do a second pass over the try blocks and insert exit TryBoundaries where
// the successor is not in the same TryItem.
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
// NOTE: Do not use iterators because SplitEdge would invalidate them.
for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
HBasicBlock* successor = try_block->GetSuccessors()[i];
@@ -337,7 +341,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// covered by the same TryItem. Otherwise the previous pass would have
// created a non-throwing boundary block.
if (GetTryItem(successor, try_block_info) != nullptr) {
- DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+ DCHECK_EQ(try_item, GetTryItem(successor, try_block_info));
continue;
}
@@ -345,7 +349,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
HTryBoundary* try_exit =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
- LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks);
}
}
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index ed630cda91..f3ecdf036a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1734,8 +1734,8 @@ class BCEVisitor : public HGraphVisitor {
*/
void InsertPhiNodes() {
// Scan all new deoptimization blocks.
- for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
- HBasicBlock* true_block = it1->second;
+ for (const auto& entry : taken_test_loop_) {
+ HBasicBlock* true_block = entry.second;
HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
// Scan all instructions in a new deoptimization block.
for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5136d7d2b8..65f3c72e99 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -145,7 +145,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
}
size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
- auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
return static_cast<size_t>(pointer_size) * index;
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ea463eeb62..9ef692aaf0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -842,7 +842,7 @@ class SlowPathGenerator {
const uint32_t dex_pc = instruction->GetDexPc();
auto iter = slow_path_map_.find(dex_pc);
if (iter != slow_path_map_.end()) {
- auto candidates = iter->second;
+ const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
for (const auto& it : candidates) {
InstructionType* other_instruction = it.first;
SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1990e8f67d..ab3d499235 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -90,13 +90,17 @@ static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instru
}
static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
- DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
__ BindTrackedLabel(bne_label);
Label placeholder_label;
__ b(&placeholder_label, NE); // Placeholder, patched at link-time.
__ Bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -1652,34 +1656,6 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
}
}
-static int64_t AdjustConstantForCondition(int64_t value,
- IfCondition* condition,
- IfCondition* opposite) {
- if (value == 1) {
- if (*condition == kCondB) {
- value = 0;
- *condition = kCondEQ;
- *opposite = kCondNE;
- } else if (*condition == kCondAE) {
- value = 0;
- *condition = kCondNE;
- *opposite = kCondEQ;
- }
- } else if (value == -1) {
- if (*condition == kCondGT) {
- value = 0;
- *condition = kCondGE;
- *opposite = kCondLT;
- } else if (*condition == kCondLE) {
- value = 0;
- *condition = kCondLT;
- *opposite = kCondGE;
- }
- }
-
- return value;
-}
-
static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
bool invert,
CodeGeneratorARM* codegen) {
@@ -1693,7 +1669,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond
std::swap(cond, opposite);
}
- std::pair<Condition, Condition> ret(EQ, NE);
+ std::pair<Condition, Condition> ret;
const Location left = locations->InAt(0);
const Location right = locations->InAt(1);
@@ -1701,38 +1677,7 @@ static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* cond
const Register left_high = left.AsRegisterPairHigh<Register>();
const Register left_low = left.AsRegisterPairLow<Register>();
- int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(),
- &cond,
- &opposite);
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (value == 0) {
- switch (cond) {
- case kCondNE:
- // x > 0 iff x != 0 when the comparison is unsigned.
- case kCondA:
- ret = std::make_pair(NE, EQ);
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- // x <= 0 iff x == 0 when the comparison is unsigned.
- case kCondBE:
- __ orrs(IP, left_low, ShifterOperand(left_high));
- return ret;
- case kCondLT:
- case kCondGE:
- __ cmp(left_high, ShifterOperand(0));
- return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
- // Trivially true or false.
- case kCondB:
- ret = std::make_pair(NE, EQ);
- FALLTHROUGH_INTENDED;
- case kCondAE:
- __ cmp(left_low, ShifterOperand(left_low));
- return ret;
- default:
- break;
- }
- }
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
switch (cond) {
case kCondEQ:
@@ -1892,14 +1837,10 @@ static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
if (locations->InAt(1).IsConstant()) {
- IfCondition c = condition->GetCondition();
- IfCondition opposite = condition->GetOppositeCondition();
- const int64_t value = AdjustConstantForCondition(
- Int64FromConstant(locations->InAt(1).GetConstant()),
- &c,
- &opposite);
+ const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
ShifterOperand so;
if (c < kCondLT || c > kCondGE) {
@@ -1907,11 +1848,9 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
// we check that the least significant half of the first input to be compared
// is in a low register (the other half is read outside an IT block), and
// the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used; 0 is always handled, no matter what registers are
- // used by the first input.
- if (value != 0 &&
- (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
- !IsUint<8>(Low32Bits(value)))) {
+ // encoding can be used.
+ if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+ !IsUint<8>(Low32Bits(value))) {
return false;
}
} else if (c == kCondLE || c == kCondGT) {
@@ -1938,329 +1877,6 @@ static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
return true;
}
-static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) {
- DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
-
- const Register out = cond->GetLocations()->Out().AsRegister<Register>();
- const auto condition = GenerateTest(cond, false, codegen);
-
- __ mov(out, ShifterOperand(0), AL, kCcKeep);
-
- if (ArmAssembler::IsLowRegister(out)) {
- __ it(condition.first);
- __ mov(out, ShifterOperand(1), condition.first);
- } else {
- Label done_label;
- Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
-
- __ b(final_label, condition.second);
- __ LoadImmediate(out, 1);
-
- if (done_label.IsLinked()) {
- __ Bind(&done_label);
- }
- }
-}
-
-static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) {
- DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
-
- const LocationSummary* const locations = cond->GetLocations();
- IfCondition condition = cond->GetCondition();
- const Register out = locations->Out().AsRegister<Register>();
- const Location left = locations->InAt(0);
- const Location right = locations->InAt(1);
- Register left_high = left.AsRegisterPairHigh<Register>();
- Register left_low = left.AsRegisterPairLow<Register>();
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
- const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
- &condition,
- &opposite);
- int32_t value_high = -High32Bits(value);
- int32_t value_low = -Low32Bits(value);
-
- // The output uses Location::kNoOutputOverlap.
- if (out == left_high) {
- std::swap(left_low, left_high);
- std::swap(value_low, value_high);
- }
-
- __ AddConstant(out, left_low, value_low);
- __ AddConstant(IP, left_high, value_high);
- } else {
- DCHECK(right.IsRegisterPair());
- __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>()));
- __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>()));
- }
-
- // Need to check after calling AdjustConstantForCondition().
- DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
-
- if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
- __ orrs(out, out, ShifterOperand(IP));
- __ it(NE);
- __ mov(out, ShifterOperand(1), NE);
- } else {
- __ orr(out, out, ShifterOperand(IP));
- codegen->GenerateConditionWithZero(condition, out, out, IP);
- }
-}
-
-static void GenerateLongComparesAndJumps(HCondition* cond,
- Label* true_label,
- Label* false_label,
- CodeGeneratorARM* codegen) {
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- IfCondition if_cond = cond->GetCondition();
-
- Register left_high = left.AsRegisterPairHigh<Register>();
- Register left_low = left.AsRegisterPairLow<Register>();
- IfCondition true_high_cond = if_cond;
- IfCondition false_high_cond = cond->GetOppositeCondition();
- Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
-
- // Set the conditions for the test, remembering that == needs to be
- // decided using the low words.
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- // Nothing to do.
- break;
- case kCondLT:
- false_high_cond = kCondGT;
- break;
- case kCondLE:
- true_high_cond = kCondLT;
- break;
- case kCondGT:
- false_high_cond = kCondLT;
- break;
- case kCondGE:
- true_high_cond = kCondGT;
- break;
- case kCondB:
- false_high_cond = kCondA;
- break;
- case kCondBE:
- true_high_cond = kCondB;
- break;
- case kCondA:
- false_high_cond = kCondB;
- break;
- case kCondAE:
- true_high_cond = kCondA;
- break;
- }
- if (right.IsConstant()) {
- int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
- int32_t val_low = Low32Bits(value);
- int32_t val_high = High32Bits(value);
-
- __ CmpConstant(left_high, val_high);
- if (if_cond == kCondNE) {
- __ b(true_label, ARMCondition(true_high_cond));
- } else if (if_cond == kCondEQ) {
- __ b(false_label, ARMCondition(false_high_cond));
- } else {
- __ b(true_label, ARMCondition(true_high_cond));
- __ b(false_label, ARMCondition(false_high_cond));
- }
- // Must be equal high, so compare the lows.
- __ CmpConstant(left_low, val_low);
- } else {
- Register right_high = right.AsRegisterPairHigh<Register>();
- Register right_low = right.AsRegisterPairLow<Register>();
-
- __ cmp(left_high, ShifterOperand(right_high));
- if (if_cond == kCondNE) {
- __ b(true_label, ARMCondition(true_high_cond));
- } else if (if_cond == kCondEQ) {
- __ b(false_label, ARMCondition(false_high_cond));
- } else {
- __ b(true_label, ARMCondition(true_high_cond));
- __ b(false_label, ARMCondition(false_high_cond));
- }
- // Must be equal high, so compare the lows.
- __ cmp(left_low, ShifterOperand(right_low));
- }
- // The last comparison might be unsigned.
- // TODO: optimize cases where this is always true/false
- __ b(true_label, final_condition);
-}
-
-static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) {
- DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
-
- const LocationSummary* const locations = cond->GetLocations();
- IfCondition condition = cond->GetCondition();
- const Register out = locations->Out().AsRegister<Register>();
- const Location left = locations->InAt(0);
- const Location right = locations->InAt(1);
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
- &condition,
- &opposite) == 0) {
- switch (condition) {
- case kCondNE:
- case kCondA:
- if (ArmAssembler::IsLowRegister(out)) {
- // We only care if both input registers are 0 or not.
- __ orrs(out,
- left.AsRegisterPairLow<Register>(),
- ShifterOperand(left.AsRegisterPairHigh<Register>()));
- __ it(NE);
- __ mov(out, ShifterOperand(1), NE);
- return;
- }
-
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- case kCondBE:
- // We only care if both input registers are 0 or not.
- __ orr(out,
- left.AsRegisterPairLow<Register>(),
- ShifterOperand(left.AsRegisterPairHigh<Register>()));
- codegen->GenerateConditionWithZero(condition, out, out);
- return;
- case kCondLT:
- case kCondGE:
- // We only care about the sign bit.
- FALLTHROUGH_INTENDED;
- case kCondAE:
- case kCondB:
- codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>());
- return;
- case kCondLE:
- case kCondGT:
- default:
- break;
- }
- }
- }
-
- if ((condition == kCondEQ || condition == kCondNE) &&
- // If `out` is a low register, then the GenerateConditionGeneric()
- // function generates a shorter code sequence that is still branchless.
- (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) {
- GenerateEqualLong(cond, codegen);
- return;
- }
-
- if (CanGenerateTest(cond, codegen->GetAssembler())) {
- GenerateConditionGeneric(cond, codegen);
- return;
- }
-
- // Convert the jumps into the result.
- Label done_label;
- Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
- Label true_label, false_label;
-
- GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
-
- // False case: result = 0.
- __ Bind(&false_label);
- __ mov(out, ShifterOperand(0));
- __ b(final_label);
-
- // True case: result = 1.
- __ Bind(&true_label);
- __ mov(out, ShifterOperand(1));
-
- if (done_label.IsLinked()) {
- __ Bind(&done_label);
- }
-}
-
-static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) {
- const Primitive::Type type = cond->GetLeft()->GetType();
-
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
-
- if (type == Primitive::kPrimLong) {
- GenerateConditionLong(cond, codegen);
- return;
- }
-
- const LocationSummary* const locations = cond->GetLocations();
- IfCondition condition = cond->GetCondition();
- Register in = locations->InAt(0).AsRegister<Register>();
- const Register out = locations->Out().AsRegister<Register>();
- const Location right = cond->GetLocations()->InAt(1);
- int64_t value;
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
-
- value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
- &condition,
- &opposite);
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (value == 0) {
- switch (condition) {
- case kCondNE:
- case kCondA:
- if (ArmAssembler::IsLowRegister(out) && out == in) {
- __ cmp(out, ShifterOperand(0));
- __ it(NE);
- __ mov(out, ShifterOperand(1), NE);
- return;
- }
-
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- case kCondBE:
- case kCondLT:
- case kCondGE:
- case kCondAE:
- case kCondB:
- codegen->GenerateConditionWithZero(condition, out, in);
- return;
- case kCondLE:
- case kCondGT:
- default:
- break;
- }
- }
- }
-
- if (condition == kCondEQ || condition == kCondNE) {
- ShifterOperand operand;
-
- if (right.IsConstant()) {
- operand = ShifterOperand(value);
- } else if (out == right.AsRegister<Register>()) {
- // Avoid 32-bit instructions if possible.
- operand = ShifterOperand(in);
- in = right.AsRegister<Register>();
- } else {
- operand = ShifterOperand(right.AsRegister<Register>());
- }
-
- if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
- __ subs(out, in, operand);
- __ it(NE);
- __ mov(out, ShifterOperand(1), NE);
- } else {
- __ sub(out, in, operand);
- codegen->GenerateConditionWithZero(condition, out, out);
- }
-
- return;
- }
-
- GenerateConditionGeneric(cond, codegen);
-}
-
static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
const Primitive::Type type = constant->GetType();
bool ret = false;
@@ -2867,6 +2483,89 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
+void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
+ Label* true_label,
+ Label* false_label) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ Register left_high = left.AsRegisterPairHigh<Register>();
+ Register left_low = left.AsRegisterPairLow<Register>();
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ switch (if_cond) {
+ case kCondEQ:
+ case kCondNE:
+ // Nothing to do.
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ break;
+ case kCondB:
+ false_high_cond = kCondA;
+ break;
+ case kCondBE:
+ true_high_cond = kCondB;
+ break;
+ case kCondA:
+ false_high_cond = kCondB;
+ break;
+ case kCondAE:
+ true_high_cond = kCondA;
+ break;
+ }
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ __ CmpConstant(left_high, val_high);
+ if (if_cond == kCondNE) {
+ __ b(true_label, ARMCondition(true_high_cond));
+ } else if (if_cond == kCondEQ) {
+ __ b(false_label, ARMCondition(false_high_cond));
+ } else {
+ __ b(true_label, ARMCondition(true_high_cond));
+ __ b(false_label, ARMCondition(false_high_cond));
+ }
+ // Must be equal high, so compare the lows.
+ __ CmpConstant(left_low, val_low);
+ } else {
+ Register right_high = right.AsRegisterPairHigh<Register>();
+ Register right_low = right.AsRegisterPairLow<Register>();
+
+ __ cmp(left_high, ShifterOperand(right_high));
+ if (if_cond == kCondNE) {
+ __ b(true_label, ARMCondition(true_high_cond));
+ } else if (if_cond == kCondEQ) {
+ __ b(false_label, ARMCondition(false_high_cond));
+ } else {
+ __ b(true_label, ARMCondition(true_high_cond));
+ __ b(false_label, ARMCondition(false_high_cond));
+ }
+ // Must be equal high, so compare the lows.
+ __ cmp(left_low, ShifterOperand(right_low));
+ }
+ // The last comparison might be unsigned.
+ // TODO: optimize cases where this is always true/false
+ __ b(true_label, final_condition);
+}
+
void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target_in,
Label* false_target_in) {
@@ -2901,7 +2600,7 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condi
Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough_target) {
__ b(false_target);
@@ -3216,80 +2915,6 @@ void CodeGeneratorARM::GenerateNop() {
__ nop();
}
-// `temp` is an extra temporary register that is used for some conditions;
-// callers may not specify it, in which case the method will use a scratch
-// register instead.
-void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition,
- Register out,
- Register in,
- Register temp) {
- switch (condition) {
- case kCondEQ:
- // x <= 0 iff x == 0 when the comparison is unsigned.
- case kCondBE:
- if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) {
- temp = out;
- }
-
- // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
- // different as well.
- if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) {
- // temp = - in; only 0 sets the carry flag.
- __ rsbs(temp, in, ShifterOperand(0));
-
- if (out == in) {
- std::swap(in, temp);
- }
-
- // out = - in + in + carry = carry
- __ adc(out, temp, ShifterOperand(in));
- } else {
- // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
- __ clz(out, in);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
- }
-
- break;
- case kCondNE:
- // x > 0 iff x != 0 when the comparison is unsigned.
- case kCondA:
- if (out == in) {
- if (temp == kNoRegister || in == temp) {
- temp = IP;
- }
- } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) {
- temp = out;
- }
-
- // temp = in - 1; only 0 does not set the carry flag.
- __ subs(temp, in, ShifterOperand(1));
- // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
- __ sbc(out, in, ShifterOperand(temp));
- break;
- case kCondGE:
- __ mvn(out, ShifterOperand(in));
- in = out;
- FALLTHROUGH_INTENDED;
- case kCondLT:
- // We only care about the sign bit.
- __ Lsr(out, in, 31);
- break;
- case kCondAE:
- // Trivially true.
- __ mov(out, ShifterOperand(1));
- break;
- case kCondB:
- // Trivially false.
- __ mov(out, ShifterOperand(0));
- break;
- default:
- LOG(FATAL) << "Unexpected condition " << condition;
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM::HandleCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -3326,42 +2951,48 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
return;
}
- const Primitive::Type type = cond->GetLeft()->GetType();
+ const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+
+ if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
- if (Primitive::IsFloatingPointType(type)) {
- GenerateConditionGeneric(cond, codegen_);
+ __ it(condition.first);
+ __ mov(out, ShifterOperand(1), condition.first);
+ __ it(condition.second);
+ __ mov(out, ShifterOperand(0), condition.second);
return;
}
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
-
- if (type == Primitive::kPrimBoolean) {
- const LocationSummary* const locations = cond->GetLocations();
- const IfCondition c = cond->GetCondition();
- Register left = locations->InAt(0).AsRegister<Register>();
- const Register out = locations->Out().AsRegister<Register>();
- const Location right_loc = locations->InAt(1);
+ // Convert the jumps into the result.
+ Label done_label;
+ Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // All other cases are handled by the instruction simplifier.
- DCHECK((c == kCondEQ || c == kCondNE) && !right_loc.IsConstant());
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ Label true_label, false_label;
- Register right = right_loc.AsRegister<Register>();
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- // Avoid 32-bit instructions if possible.
- if (out == right) {
- std::swap(left, right);
- }
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ LoadImmediate(out, 0);
+ __ b(final_label);
- __ eor(out, left, ShifterOperand(right));
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ LoadImmediate(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
- if (c == kCondEQ) {
- __ eor(out, out, ShifterOperand(1));
- }
+ const auto condition = GenerateTest(cond, false, codegen_);
- return;
+ __ mov(out, ShifterOperand(0), AL, kCcKeep);
+ __ b(final_label, condition.second);
+ __ LoadImmediate(out, 1);
}
- GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -6743,6 +6374,15 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress*
}
}
+void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
@@ -8430,8 +8070,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
// return_address:
CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8444,16 +8085,18 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8553,10 +8196,12 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = temp.AsRegister<Register>();
@@ -8564,10 +8209,14 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
CheckLastTempIsBakerCcEntrypointRegister(instruction);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8580,19 +8229,20 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- Register ref_reg = ref.AsRegister<Register>();
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, ref_reg, base, offset);
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8638,7 +8288,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8663,15 +8313,15 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
@@ -9426,14 +9076,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ac9d57aa0a..b94ee20d9d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -299,6 +299,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
+ void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -625,14 +626,6 @@ class CodeGeneratorARM : public CodeGenerator {
void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- // `temp` is an extra temporary register that is used for some conditions;
- // callers may not specify it, in which case the method will use a scratch
- // register instead.
- void GenerateConditionWithZero(IfCondition condition,
- Register out,
- Register in,
- Register temp = kNoRegister);
-
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7d9778a4e7..fa39b79e39 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1515,7 +1515,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
if (kind == Location::kRegister) {
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
- DCHECK(kind == Location::kFpuRegister);
+ DCHECK_EQ(kind, Location::kFpuRegister);
scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
: vixl_temps_.AcquireD());
@@ -1743,9 +1743,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
-// Allocate a scratch register from the VIXL pool, querying first into
-// the floating-point register pool, and then the the core register
-// pool. This is essentially a reimplementation of
+// Allocate a scratch register from the VIXL pool, querying first
+// the floating-point register pool, and then the core register
+// pool. This is essentially a reimplementation of
// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
// using a different allocation strategy.
static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
@@ -1893,7 +1893,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
// ask for a scratch register of any type (core or FP).
//
// Also, we start by asking for a FP scratch register first, as the
- // demand of scratch core registers is higher. This is why we
+ // demand of scratch core registers is higher. This is why we
// use AcquireFPOrCoreCPURegisterOfSize instead of
// UseScratchRegisterScope::AcquireCPURegisterOfSize, which
// allocates core scratch registers first.
@@ -2661,6 +2661,38 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres
Operand(InputOperandAt(instruction, 1)));
}
+void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ // For byte case we don't need to shift the index variable so we can encode the data offset into
+ // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
+ // data offset constant generation out of the loop and reduce the critical path length in the
+ // loop.
+ locations->SetInAt(1, shift->GetValue() == 0
+ ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ : Location::RequiresRegister());
+ locations->SetInAt(2, Location::ConstantLocation(shift));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ Register index_reg = InputRegisterAt(instruction, 0);
+ uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+ uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
+
+ if (shift == 0) {
+ __ Add(OutputRegister(instruction), index_reg, offset);
+ } else {
+ Register offset_reg = InputRegisterAt(instruction, 1);
+ __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
+ }
+}
+
void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -6102,7 +6134,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6197,7 +6229,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -6571,14 +6603,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 502b298163..1759c68125 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -124,6 +124,10 @@ static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Lab
__ bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
@@ -1771,34 +1775,6 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege
}
}
-static int64_t AdjustConstantForCondition(int64_t value,
- IfCondition* condition,
- IfCondition* opposite) {
- if (value == 1) {
- if (*condition == kCondB) {
- value = 0;
- *condition = kCondEQ;
- *opposite = kCondNE;
- } else if (*condition == kCondAE) {
- value = 0;
- *condition = kCondNE;
- *opposite = kCondEQ;
- }
- } else if (value == -1) {
- if (*condition == kCondGT) {
- value = 0;
- *condition = kCondGE;
- *opposite = kCondLT;
- } else if (*condition == kCondLE) {
- value = 0;
- *condition = kCondLT;
- *opposite = kCondGE;
- }
- }
-
- return value;
-}
-
static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
HCondition* condition,
bool invert,
@@ -1821,37 +1797,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
const vixl32::Register left_high = HighRegisterFrom(left);
const vixl32::Register left_low = LowRegisterFrom(left);
- int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
- UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (value == 0) {
- switch (cond) {
- case kCondNE:
- // x > 0 iff x != 0 when the comparison is unsigned.
- case kCondA:
- ret = std::make_pair(ne, eq);
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- // x <= 0 iff x == 0 when the comparison is unsigned.
- case kCondBE:
- __ Orrs(temps.Acquire(), left_low, left_high);
- return ret;
- case kCondLT:
- case kCondGE:
- __ Cmp(left_high, 0);
- return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
- // Trivially true or false.
- case kCondB:
- ret = std::make_pair(ne, eq);
- FALLTHROUGH_INTENDED;
- case kCondAE:
- __ Cmp(left_low, left_low);
- return ret;
- default:
- break;
- }
- }
+ int64_t value = Int64ConstantFrom(right);
switch (cond) {
case kCondEQ:
@@ -1896,6 +1842,8 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
__ Cmp(left_low, Low32Bits(value));
__ Sbcs(temps.Acquire(), left_high, High32Bits(value));
ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
@@ -2013,22 +1961,18 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition*
static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
const LocationSummary* const locations = condition->GetLocations();
+ const IfCondition c = condition->GetCondition();
if (locations->InAt(1).IsConstant()) {
- IfCondition c = condition->GetCondition();
- IfCondition opposite = condition->GetOppositeCondition();
- const int64_t value =
- AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite);
+ const int64_t value = Int64ConstantFrom(locations->InAt(1));
if (c < kCondLT || c > kCondGE) {
// Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
// we check that the least significant half of the first input to be compared
// is in a low register (the other half is read outside an IT block), and
// the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used; 0 is always handled, no matter what registers are
- // used by the first input.
- if (value != 0 &&
- (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) {
+ // encoding can be used.
+ if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
return false;
}
// TODO(VIXL): The rest of the checks are there to keep the backend in sync with
@@ -2047,353 +1991,6 @@ static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler)
return true;
}
-static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
- DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
-
- const vixl32::Register out = OutputRegister(cond);
- const auto condition = GenerateTest(cond, false, codegen);
-
- __ Mov(LeaveFlags, out, 0);
-
- if (out.IsLow()) {
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
- 2 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(condition.first);
- __ mov(condition.first, out, 1);
- } else {
- vixl32::Label done_label;
- vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
-
- __ B(condition.second, final_label, /* far_target */ false);
- __ Mov(out, 1);
-
- if (done_label.IsReferenced()) {
- __ Bind(&done_label);
- }
- }
-}
-
-static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
- DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
-
- const LocationSummary* const locations = cond->GetLocations();
- IfCondition condition = cond->GetCondition();
- const vixl32::Register out = OutputRegister(cond);
- const Location left = locations->InAt(0);
- const Location right = locations->InAt(1);
- vixl32::Register left_high = HighRegisterFrom(left);
- vixl32::Register left_low = LowRegisterFrom(left);
- vixl32::Register temp;
- UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
- const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
- &condition,
- &opposite);
- Operand right_high = High32Bits(value);
- Operand right_low = Low32Bits(value);
-
- // The output uses Location::kNoOutputOverlap.
- if (out.Is(left_high)) {
- std::swap(left_low, left_high);
- std::swap(right_low, right_high);
- }
-
- __ Sub(out, left_low, right_low);
- temp = temps.Acquire();
- __ Sub(temp, left_high, right_high);
- } else {
- DCHECK(right.IsRegisterPair());
- temp = temps.Acquire();
- __ Sub(temp, left_high, HighRegisterFrom(right));
- __ Sub(out, left_low, LowRegisterFrom(right));
- }
-
- // Need to check after calling AdjustConstantForCondition().
- DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
-
- if (condition == kCondNE && out.IsLow()) {
- __ Orrs(out, out, temp);
-
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
- 2 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(ne);
- __ mov(ne, out, 1);
- } else {
- __ Orr(out, out, temp);
- codegen->GenerateConditionWithZero(condition, out, out, temp);
- }
-}
-
-static void GenerateLongComparesAndJumps(HCondition* cond,
- vixl32::Label* true_label,
- vixl32::Label* false_label,
- CodeGeneratorARMVIXL* codegen) {
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- IfCondition if_cond = cond->GetCondition();
-
- vixl32::Register left_high = HighRegisterFrom(left);
- vixl32::Register left_low = LowRegisterFrom(left);
- IfCondition true_high_cond = if_cond;
- IfCondition false_high_cond = cond->GetOppositeCondition();
- vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
-
- // Set the conditions for the test, remembering that == needs to be
- // decided using the low words.
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- // Nothing to do.
- break;
- case kCondLT:
- false_high_cond = kCondGT;
- break;
- case kCondLE:
- true_high_cond = kCondLT;
- break;
- case kCondGT:
- false_high_cond = kCondLT;
- break;
- case kCondGE:
- true_high_cond = kCondGT;
- break;
- case kCondB:
- false_high_cond = kCondA;
- break;
- case kCondBE:
- true_high_cond = kCondB;
- break;
- case kCondA:
- false_high_cond = kCondB;
- break;
- case kCondAE:
- true_high_cond = kCondA;
- break;
- }
- if (right.IsConstant()) {
- int64_t value = Int64ConstantFrom(right);
- int32_t val_low = Low32Bits(value);
- int32_t val_high = High32Bits(value);
-
- __ Cmp(left_high, val_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
- } else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, val_low);
- } else {
- vixl32::Register right_high = HighRegisterFrom(right);
- vixl32::Register right_low = LowRegisterFrom(right);
-
- __ Cmp(left_high, right_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label);
- } else {
- __ B(ARMCondition(true_high_cond), true_label);
- __ B(ARMCondition(false_high_cond), false_label);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, right_low);
- }
- // The last comparison might be unsigned.
- // TODO: optimize cases where this is always true/false
- __ B(final_condition, true_label);
-}
-
-static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
- DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
-
- const LocationSummary* const locations = cond->GetLocations();
- IfCondition condition = cond->GetCondition();
- const vixl32::Register out = OutputRegister(cond);
- const Location left = locations->InAt(0);
- const Location right = locations->InAt(1);
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
- switch (condition) {
- case kCondNE:
- case kCondA:
- if (out.IsLow()) {
- // We only care if both input registers are 0 or not.
- __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
-
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
- 2 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(ne);
- __ mov(ne, out, 1);
- return;
- }
-
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- case kCondBE:
- // We only care if both input registers are 0 or not.
- __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
- codegen->GenerateConditionWithZero(condition, out, out);
- return;
- case kCondLT:
- case kCondGE:
- // We only care about the sign bit.
- FALLTHROUGH_INTENDED;
- case kCondAE:
- case kCondB:
- codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
- return;
- case kCondLE:
- case kCondGT:
- default:
- break;
- }
- }
- }
-
- if ((condition == kCondEQ || condition == kCondNE) &&
- // If `out` is a low register, then the GenerateConditionGeneric()
- // function generates a shorter code sequence that is still branchless.
- (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) {
- GenerateEqualLong(cond, codegen);
- return;
- }
-
- if (CanGenerateTest(cond, codegen->GetAssembler())) {
- GenerateConditionGeneric(cond, codegen);
- return;
- }
-
- // Convert the jumps into the result.
- vixl32::Label done_label;
- vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
- vixl32::Label true_label, false_label;
-
- GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
-
- // False case: result = 0.
- __ Bind(&false_label);
- __ Mov(out, 0);
- __ B(final_label);
-
- // True case: result = 1.
- __ Bind(&true_label);
- __ Mov(out, 1);
-
- if (done_label.IsReferenced()) {
- __ Bind(&done_label);
- }
-}
-
-static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
- const Primitive::Type type = cond->GetLeft()->GetType();
-
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
-
- if (type == Primitive::kPrimLong) {
- GenerateConditionLong(cond, codegen);
- return;
- }
-
- IfCondition condition = cond->GetCondition();
- vixl32::Register in = InputRegisterAt(cond, 0);
- const vixl32::Register out = OutputRegister(cond);
- const Location right = cond->GetLocations()->InAt(1);
- int64_t value;
-
- if (right.IsConstant()) {
- IfCondition opposite = cond->GetOppositeCondition();
-
- value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
-
- // Comparisons against 0 are common enough to deserve special attention.
- if (value == 0) {
- switch (condition) {
- case kCondNE:
- case kCondA:
- if (out.IsLow() && out.Is(in)) {
- __ Cmp(out, 0);
-
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
- 2 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(ne);
- __ mov(ne, out, 1);
- return;
- }
-
- FALLTHROUGH_INTENDED;
- case kCondEQ:
- case kCondBE:
- case kCondLT:
- case kCondGE:
- case kCondAE:
- case kCondB:
- codegen->GenerateConditionWithZero(condition, out, in);
- return;
- case kCondLE:
- case kCondGT:
- default:
- break;
- }
- }
- }
-
- if (condition == kCondEQ || condition == kCondNE) {
- Operand operand(0);
-
- if (right.IsConstant()) {
- operand = Operand::From(value);
- } else if (out.Is(RegisterFrom(right))) {
- // Avoid 32-bit instructions if possible.
- operand = InputOperandAt(cond, 0);
- in = RegisterFrom(right);
- } else {
- operand = InputOperandAt(cond, 1);
- }
-
- if (condition == kCondNE && out.IsLow()) {
- __ Subs(out, in, operand);
-
- // We use the scope because of the IT block that follows.
- ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
- 2 * vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
-
- __ it(ne);
- __ mov(ne, out, 1);
- } else {
- __ Sub(out, in, operand);
- codegen->GenerateConditionWithZero(condition, out, out);
- }
-
- return;
- }
-
- GenerateConditionGeneric(cond, codegen);
-}
-
static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
const Primitive::Type type = constant->GetType();
bool ret = false;
@@ -2954,6 +2551,89 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
+void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
+ vixl32::Label* true_label,
+ vixl32::Label* false_label) {
+ LocationSummary* locations = cond->GetLocations();
+ Location left = locations->InAt(0);
+ Location right = locations->InAt(1);
+ IfCondition if_cond = cond->GetCondition();
+
+ vixl32::Register left_high = HighRegisterFrom(left);
+ vixl32::Register left_low = LowRegisterFrom(left);
+ IfCondition true_high_cond = if_cond;
+ IfCondition false_high_cond = cond->GetOppositeCondition();
+ vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
+
+ // Set the conditions for the test, remembering that == needs to be
+ // decided using the low words.
+ switch (if_cond) {
+ case kCondEQ:
+ case kCondNE:
+ // Nothing to do.
+ break;
+ case kCondLT:
+ false_high_cond = kCondGT;
+ break;
+ case kCondLE:
+ true_high_cond = kCondLT;
+ break;
+ case kCondGT:
+ false_high_cond = kCondLT;
+ break;
+ case kCondGE:
+ true_high_cond = kCondGT;
+ break;
+ case kCondB:
+ false_high_cond = kCondA;
+ break;
+ case kCondBE:
+ true_high_cond = kCondB;
+ break;
+ case kCondA:
+ false_high_cond = kCondB;
+ break;
+ case kCondAE:
+ true_high_cond = kCondA;
+ break;
+ }
+ if (right.IsConstant()) {
+ int64_t value = Int64ConstantFrom(right);
+ int32_t val_low = Low32Bits(value);
+ int32_t val_high = High32Bits(value);
+
+ __ Cmp(left_high, val_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, val_low);
+ } else {
+ vixl32::Register right_high = HighRegisterFrom(right);
+ vixl32::Register right_low = LowRegisterFrom(right);
+
+ __ Cmp(left_high, right_high);
+ if (if_cond == kCondNE) {
+ __ B(ARMCondition(true_high_cond), true_label);
+ } else if (if_cond == kCondEQ) {
+ __ B(ARMCondition(false_high_cond), false_label);
+ } else {
+ __ B(ARMCondition(true_high_cond), true_label);
+ __ B(ARMCondition(false_high_cond), false_label);
+ }
+ // Must be equal high, so compare the lows.
+ __ Cmp(left_low, right_low);
+ }
+ // The last comparison might be unsigned.
+ // TODO: optimize cases where this is always true/false
+ __ B(final_condition, true_label);
+}
+
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target_in,
vixl32::Label* false_target_in) {
@@ -2988,7 +2668,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* c
vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
+ GenerateLongComparesAndJumps(condition, true_target, false_target);
if (false_target != &fallthrough) {
__ B(false_target);
@@ -3299,83 +2979,6 @@ void CodeGeneratorARMVIXL::GenerateNop() {
__ Nop();
}
-// `temp` is an extra temporary register that is used for some conditions;
-// callers may not specify it, in which case the method will use a scratch
-// register instead.
-void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
- vixl32::Register out,
- vixl32::Register in,
- vixl32::Register temp) {
- switch (condition) {
- case kCondEQ:
- // x <= 0 iff x == 0 when the comparison is unsigned.
- case kCondBE:
- if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
- temp = out;
- }
-
- // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
- // different as well.
- if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
- // temp = - in; only 0 sets the carry flag.
- __ Rsbs(temp, in, 0);
-
- if (out.Is(in)) {
- std::swap(in, temp);
- }
-
- // out = - in + in + carry = carry
- __ Adc(out, temp, in);
- } else {
- // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
- __ Clz(out, in);
- // Any number less than 32 logically shifted right by 5 bits results in 0;
- // the same operation on 32 yields 1.
- __ Lsr(out, out, 5);
- }
-
- break;
- case kCondNE:
- // x > 0 iff x != 0 when the comparison is unsigned.
- case kCondA: {
- UseScratchRegisterScope temps(GetVIXLAssembler());
-
- if (out.Is(in)) {
- if (!temp.IsValid() || in.Is(temp)) {
- temp = temps.Acquire();
- }
- } else if (!temp.IsValid() || !temp.IsLow()) {
- temp = out;
- }
-
- // temp = in - 1; only 0 does not set the carry flag.
- __ Subs(temp, in, 1);
- // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
- __ Sbc(out, in, temp);
- break;
- }
- case kCondGE:
- __ Mvn(out, in);
- in = out;
- FALLTHROUGH_INTENDED;
- case kCondLT:
- // We only care about the sign bit.
- __ Lsr(out, in, 31);
- break;
- case kCondAE:
- // Trivially true.
- __ Mov(out, 1);
- break;
- case kCondB:
- // Trivially false.
- __ Mov(out, 0);
- break;
- default:
- LOG(FATAL) << "Unexpected condition " << condition;
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -3412,41 +3015,52 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
return;
}
- const Primitive::Type type = cond->GetLeft()->GetType();
+ const vixl32::Register out = OutputRegister(cond);
+
+ if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
+ const auto condition = GenerateTest(cond, false, codegen_);
+ // We use the scope because of the IT block that follows.
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ 4 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
- if (Primitive::IsFloatingPointType(type)) {
- GenerateConditionGeneric(cond, codegen_);
+ __ it(condition.first);
+ __ mov(condition.first, out, 1);
+ __ it(condition.second);
+ __ mov(condition.second, out, 0);
return;
}
- DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
-
- if (type == Primitive::kPrimBoolean) {
- const IfCondition c = cond->GetCondition();
- vixl32::Register left = InputRegisterAt(cond, 0);
- const vixl32::Register out = OutputRegister(cond);
- const Location right_loc = cond->GetLocations()->InAt(1);
+ // Convert the jumps into the result.
+ vixl32::Label done_label;
+ vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
- // All other cases are handled by the instruction simplifier.
- DCHECK((c == kCondEQ || c == kCondNE) && !right_loc.IsConstant());
+ if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+ vixl32::Label true_label, false_label;
- vixl32::Register right = RegisterFrom(right_loc);
+ GenerateLongComparesAndJumps(cond, &true_label, &false_label);
- // Avoid 32-bit instructions if possible.
- if (out.Is(right)) {
- std::swap(left, right);
- }
+ // False case: result = 0.
+ __ Bind(&false_label);
+ __ Mov(out, 0);
+ __ B(final_label);
- __ Eor(out, left, right);
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ Mov(out, 1);
+ } else {
+ DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
- if (c == kCondEQ) {
- __ Eor(out, out, 1);
- }
+ const auto condition = GenerateTest(cond, false, codegen_);
- return;
+ __ Mov(LeaveFlags, out, 0);
+ __ B(condition.second, final_label, /* far_target */ false);
+ __ Mov(out, 1);
}
- GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -6833,6 +6447,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddr
}
}
+void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -8557,8 +8181,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8573,15 +8198,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8682,10 +8308,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
@@ -8693,12 +8321,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
+ base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8715,19 +8346,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
- __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // Note: We need a Wide NEG for the unpoisoning.
+ // Note: We need a specific width for the unpoisoning NEG.
if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8773,7 +8409,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8803,9 +8439,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
// Note: We need a Wide NEG for the unpoisoning.
@@ -8813,6 +8447,8 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
@@ -9625,14 +9261,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index afff72fb52..657d3c134f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -401,6 +401,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateCompareTestAndBranch(HCondition* condition,
vixl::aarch32::Label* true_target,
vixl::aarch32::Label* false_target);
+ void GenerateLongComparesAndJumps(HCondition* cond,
+ vixl::aarch32::Label* true_label,
+ vixl::aarch32::Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -717,14 +720,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
vixl::aarch32::Register out);
- // `temp` is an extra temporary register that is used for some conditions;
- // callers may not specify it, in which case the method will use a scratch
- // register instead.
- void GenerateConditionWithZero(IfCondition condition,
- vixl::aarch32::Register out,
- vixl::aarch32::Register in,
- vixl::aarch32::Register temp = vixl32::Register());
-
private:
vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
vixl::aarch32::Register temp);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e9870acff4..fdfa4eedf8 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1780,16 +1780,18 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const JitPatchInfo& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file,
- dex::StringIndex(info.index)));
+ const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file,
+ dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const JitPatchInfo& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
- dex::TypeIndex(info.index)));
+ const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
+ dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index f04e3841f5..d3ae3a729b 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1586,14 +1586,20 @@ void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b25c..478bd24388 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -783,6 +783,12 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress(
/*out*/ Register* scratch) {
LocationSummary* locations = instruction->GetLocations();
Register base = InputRegisterAt(instruction, 0);
+
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
Location index = locations->InAt(1);
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cf2d5cbee3..bd9a5d2564 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7703,7 +7703,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7842,17 +7842,19 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f2ed52b5a5..6b0e001ad8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -7055,7 +7055,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7149,17 +7149,19 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4ba5c5580f..fe25b7690d 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -76,7 +76,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data);
@@ -89,7 +89,7 @@ static void TestCode(const uint16_t* data,
static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
@@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
//
// Assertion failed (!available->IsEmpty())
//
- // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable,
+ // because of the following situation:
+ //
+ // 1. a temp register (IP0) is allocated as a scratch register by
+ // the parallel move resolver to solve a cycle (swap):
+ //
+ // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ]
+ // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ]
+ //
+ // 2. within CodeGeneratorARM64::MoveLocation, another temp
+ // register (IP1) is allocated to generate the swap between two
+ // double stack slots;
+ //
+ // 3. VIXL requires a third temp register to emit the `Ldr` or
+ // `Str` operation from CodeGeneratorARM64::MoveLocation (as
+ // one of the stack slots' offsets cannot be encoded as an
+ // immediate), but the pool of (core) temp registers is now
+ // empty.
+ //
+ // The solution used so far is to use a floating-point temp register
+ // (D31) in step #2, so that IP1 is available for step #3.
+
HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
move->AddMove(Location::DoubleStackSlot(0),
Location::DoubleStackSlot(257),
@@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
-
#endif
#ifdef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 31cd204c9f..00a16fe849 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -243,7 +243,7 @@ static void ValidateGraph(HGraph* graph) {
GraphChecker graph_checker(graph);
graph_checker.Run();
if (!graph_checker.IsValid()) {
- for (const auto& error : graph_checker.GetErrors()) {
+ for (const std::string& error : graph_checker.GetErrors()) {
std::cout << error << std::endl;
}
}
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c93bc210be..8ea312d0ea 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -516,13 +516,13 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
- for (auto dominated_block : block->GetDominatedBlocks()) {
+ for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
return true;
}
}
- for (auto successor : block->GetSuccessors()) {
+ for (const HBasicBlock* successor : block->GetSuccessors()) {
if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
return true;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f16e3727c8..311be1fb49 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -216,5 +216,18 @@ void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
}
}
+void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
+ if (!instruction->IsStringCharAt()
+ && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
+ if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eec4e49792..8596f6ad40 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -75,6 +75,8 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
void VisitVecMul(HVecMul* instruction) OVERRIDE;
+ void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+ void VisitVecStore(HVecStore* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c39e5f4d3b..e5a8499ff4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,8 @@
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
+
namespace art {
namespace {
@@ -346,4 +348,59 @@ bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
return false;
}
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
+ if (index->IsConstant()) {
+ // If index is constant the whole address calculation often can be done by LDR/STR themselves.
+ // TODO: Treat the case with not-embedable constant.
+ return false;
+ }
+
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+ Primitive::Type packed_type = access->GetPackedType();
+ uint32_t data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t component_shift = Primitive::ComponentSizeShift(packed_type);
+
+ bool is_extracting_beneficial = false;
+ // It is beneficial to extract index intermediate address only if there are at least 2 users.
+ for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsVecMemoryOperation() && user != access) {
+ HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
+ Primitive::Type another_packed_type = another_access->GetPackedType();
+ uint32_t another_data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(another_packed_type)).Uint32Value();
+ size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type);
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ } else if (user->IsIntermediateAddressIndex()) {
+ HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
+ uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
+ size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ }
+ }
+
+ if (!is_extracting_beneficial) {
+ return false;
+ }
+
+ // Proceed to extract the index + data_offset address computation.
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntConstant* shift = graph->GetIntConstant(component_shift);
+ HIntermediateAddressIndex* address =
+ new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
+
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 1);
+
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 2ea103a518..371619fa2e 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -59,6 +59,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
size_t data_offset);
bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index fc7d20c793..69cf9a126f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2598,7 +2598,11 @@ void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
// We don't care about the sign bit, so shift left.
__ Lsl(out, out, 1);
__ eor(out, out, ShifterOperand(infinity));
- codegen_->GenerateConditionWithZero(kCondEQ, out, out);
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
}
void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2621,7 +2625,11 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
__ eor(out, out, ShifterOperand(infinity_high2));
// We don't care about the sign bit, so shift left.
__ orr(out, IP, ShifterOperand(out, LSL, 1));
- codegen_->GenerateConditionWithZero(kCondEQ, out, out);
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
}
void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 56d06eb666..356d5bcb0c 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2971,7 +2971,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
// We don't care about the sign bit, so shift left.
__ Lsl(out, out, 1);
__ Eor(out, out, infinity);
- codegen_->GenerateConditionWithZero(kCondEQ, out, out);
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ Clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
}
void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2997,7 +3001,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
__ Eor(out, out, infinity_high2);
// We don't care about the sign bit, so shift left.
__ Orr(out, temp, Operand(out, vixl32::LSL, 1));
- codegen_->GenerateConditionWithZero(kCondEQ, out, out);
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ Clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
}
void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ed2ad86bf..af0b193b03 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -759,7 +759,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
@@ -898,7 +898,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b4da20b558..522962485b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1406,7 +1406,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(BitwiseNegatedRight, Instruction) \
M(DataProcWithShifterOp, Instruction) \
M(MultiplyAccumulate, Instruction) \
- M(IntermediateAddress, Instruction)
+ M(IntermediateAddress, Instruction) \
+ M(IntermediateAddressIndex, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c6bfbcc7fb..075a816f3f 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,49 @@ class HIntermediateAddress FINAL : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
};
+// This instruction computes part of the array access offset (data and index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing
+// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with
+// the same data type and index. For example, for the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+// for (i...) {
+// a[i] = a[i] + 5;
+// b[i] = b[i] + c[i];
+// }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects (in comparison of HIntermediateAddress).
+class HIntermediateAddressIndex FINAL : public HExpression<3> {
+ public:
+ HIntermediateAddressIndex(
+ HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+ SetRawInputAt(0, index);
+ SetRawInputAt(1, offset);
+ SetRawInputAt(2, shift);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetIndex() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+ HInstruction* GetShift() const { return InputAt(2); }
+
+ DECLARE_INSTRUCTION(IntermediateAddressIndex);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex);
+};
+
class HDataProcWithShifterOp FINAL : public HExpression<2> {
public:
enum OpKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b52f..92fe9bfa7d 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -178,12 +178,17 @@ class HVecMemoryOperation : public HVecOperation {
size_t vector_length,
uint32_t dex_pc)
: HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
- alignment_(Primitive::ComponentSize(packed_type), 0) { }
+ alignment_(Primitive::ComponentSize(packed_type), 0) {
+ DCHECK_GE(number_of_inputs, 2u);
+ }
void SetAlignment(Alignment alignment) { alignment_ = alignment; }
Alignment GetAlignment() const { return alignment_; }
+ HInstruction* GetArray() const { return InputAt(0); }
+ HInstruction* GetIndex() const { return InputAt(1); }
+
DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
private:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 065c11eddb..f928f71209 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -638,11 +638,14 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
new (arena) arm::InstructionSimplifierArm(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+ HInstructionScheduling* scheduling =
+ new (arena) HInstructionScheduling(graph, instruction_set, codegen);
HOptimization* arm_optimizations[] = {
simplifier,
side_effects,
gvn,
- fixups
+ fixups,
+ scheduling,
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
break;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 87f709f63d..300f4c6239 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1968,8 +1968,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
allocator_->Adapter(kArenaAllocRegisterAllocator));
- for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
- LiveInterval* parent_interval = *it;
+ for (LiveInterval* parent_interval : *intervals) {
DCHECK(parent_interval->IsParent());
DCHECK(!parent_interval->HasSpillSlot());
size_t start = parent_interval->GetStart();
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index d65d20cf43..320f01a727 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -23,6 +23,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
void SchedulingGraph::AddDependency(SchedulingNode* node,
@@ -264,10 +268,11 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description,
// Start the dot graph. Use an increasing index for easier differentiation.
output << "digraph G {\n";
for (const auto& entry : nodes_map_) {
- DumpAsDotNode(output, entry.second);
+ SchedulingNode* node = entry.second;
+ DumpAsDotNode(output, node);
}
// Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
- for (auto node : initial_candidates) {
+ for (SchedulingNode* node : initial_candidates) {
const HInstruction* instruction = node->GetInstruction();
output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
<< "[label=\"" << node->GetLatency() << "\",dir=back]\n";
@@ -580,28 +585,39 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
bool schedule_randomly) {
+#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
+ // Phase-local allocator that allocates scheduler internal data structures like
+ // scheduling nodes, internel nodes map, dependencies, etc.
+ ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ RandomSchedulingNodeSelector random_selector;
+ SchedulingNodeSelector* selector = schedule_randomly
+ ? static_cast<SchedulingNodeSelector*>(&random_selector)
+ : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+#else
// Avoid compilation error when compiling for unsupported instruction set.
UNUSED(only_optimize_loop_blocks);
UNUSED(schedule_randomly);
+#endif
switch (instruction_set_) {
#ifdef ART_ENABLE_CODEGEN_arm64
case kArm64: {
- // Phase-local allocator that allocates scheduler internal data structures like
- // scheduling nodes, internel nodes map, dependencies, etc.
- ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
-
- CriticalPathSchedulingNodeSelector critical_path_selector;
- RandomSchedulingNodeSelector random_selector;
- SchedulingNodeSelector* selector = schedule_randomly
- ? static_cast<SchedulingNodeSelector*>(&random_selector)
- : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
-
arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
}
#endif
+#if defined(ART_ENABLE_CODEGEN_arm)
+ case kThumb2:
+ case kArm: {
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
+ arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor);
+ scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+ scheduler.Schedule(graph_);
+ break;
+ }
+#endif
default:
break;
}
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index 9236a0e4fa..73e8087cd0 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,6 +23,7 @@
#include "driver/compiler_driver.h"
#include "nodes.h"
#include "optimization.h"
+#include "code_generator.h"
namespace art {
@@ -469,8 +470,9 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction
class HInstructionScheduling : public HOptimization {
public:
- HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+ HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr)
: HOptimization(graph, kInstructionScheduling),
+ codegen_(cg),
instruction_set_(instruction_set) {}
void Run() {
@@ -480,6 +482,7 @@ class HInstructionScheduling : public HOptimization {
static constexpr const char* kInstructionScheduling = "scheduler";
+ CodeGenerator* const codegen_;
const InstructionSet instruction_set_;
private:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
new file mode 100644
index 0000000000..1a89567991
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "mirror/array-inl.h"
+#include "scheduler_arm.h"
+
+namespace art {
+namespace arm {
+
+using helpers::Int32ConstantFrom;
+using helpers::Uint64ConstantFrom;
+
+void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
+ // so a bubble (kArmNopLatency) is added to represent the internal carry flag
+ // dependency inside these pairs.
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmMulFloatingPointLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmMulIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong: {
+ // HandleLongRotate
+ HInstruction* rhs = instr->GetRight();
+ if (rhs->IsConstant()) {
+ uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (rot != 0u) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+ } else {
+ last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
+ Primitive::Type type = instr->GetResultType();
+ HInstruction* rhs = instr->GetRight();
+ switch (type) {
+ case Primitive::kPrimInt:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
+ } else {
+ uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (shift_value == 1 || shift_value >= 32) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ }
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+ switch (instr->GetLeft()->GetType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
+ Primitive::Type type = instr->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
+ if (internal_latency) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmDataProcWithShifterOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ if (kind == HInstruction::kAdd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (kind == HInstruction::kSub) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+ const uint32_t shift_value = instruction->GetShiftAmount();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+
+ if (shift_value >= 32) {
+ // Different shift types actually generate similar code here,
+ // no need to differentiate shift types like the codegen pass does,
+ // which also avoids handling shift types from different ARM backends.
+ HandleGenerateDataProc(instruction);
+ } else {
+ DCHECK_GT(shift_value, 1U);
+ DCHECK_LT(shift_value, 32U);
+
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ } else {
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ HandleGenerateDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
+ const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+ if (instruction->GetType() == Primitive::kPrimInt) {
+ DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+ HandleGenerateDataProcInstruction();
+ } else {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ HandleGenerateDataProc(instruction);
+ } else {
+ HandleGenerateLongDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_internal_latency_ = kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ HInstruction* index = instruction->InputAt(1);
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ += kArmMemoryLoadLatency;
+ }
+ if (index->IsConstant()) {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
+ } else {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
+ HInstruction* index = instruction->InputAt(1);
+ Primitive::Type value_type = instruction->GetComponentType();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+
+ switch (value_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (instruction->InputAt(2)->IsNullConstant()) {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ } else {
+ // Following the exact instructions of runtime type checks is too complicated,
+ // just giving it a simple slow latency.
+ last_visited_latency_ = kArmRuntimeTypeCheckLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << value_type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_latency_ = kArmDivIntegerLatency;
+ }
+ break;
+ }
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmDivFloatLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmDivDoubleLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmLoadStringInternalLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
+ if (instruction->IsStringAlloc()) {
+ last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
+ } else {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ }
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_internal_latency_ = kArmDivIntegerLatency;
+ last_visited_latency_ = kArmMulIntegerLatency;
+ }
+ break;
+ }
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimNot:
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+ }
+
+ if (is_volatile) {
+ last_visited_internal_latency_ += kArmMemoryBarrierLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ if (is_volatile) {
+ last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmMemoryBarrierLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency +
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ DCHECK((block->GetLoopInformation() != nullptr) ||
+ (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
+ Primitive::Type result_type = instr->GetResultType();
+ Primitive::Type input_type = instr->GetInputType();
+
+ switch (result_type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
+ break;
+
+ case Primitive::kPrimInt:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ last_visited_latency_ = kArmIntegerOpLatency; // MOV
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ // MOV and extension
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimDouble:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
new file mode 100644
index 0000000000..8d5e4f375b
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+
+#include "code_generator_arm_vixl.h"
+#include "scheduler.h"
+
+namespace art {
+namespace arm {
+#ifdef ART_USE_OLD_ARM_BACKEND
+typedef CodeGeneratorARM CodeGeneratorARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+#endif
+
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+ public:
+ explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+ : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(Add , unused) \
+ M(Sub , unused) \
+ M(And , unused) \
+ M(Or , unused) \
+ M(Ror , unused) \
+ M(Xor , unused) \
+ M(Shl , unused) \
+ M(Shr , unused) \
+ M(UShr , unused) \
+ M(Mul , unused) \
+ M(Div , unused) \
+ M(Condition , unused) \
+ M(Compare , unused) \
+ M(BoundsCheck , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceFieldSet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(StaticFieldSet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused) \
+ M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) OVERRIDE;
+
+ FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+ void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+ void HandleShiftLatencies(HBinaryOperation* instr);
+ void HandleDivRemConstantIntegralLatencies(int32_t imm);
+ void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleGenerateDataProcInstruction(bool internal_latency = false);
+ void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+ void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+ // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+ // latency visitors may query CodeGenerator for such information for accurate latency settings.
+ CodeGeneratorARMType* codegen_;
+};
+
+class HSchedulerARM : public HScheduler {
+ public:
+ HSchedulerARM(ArenaAllocator* arena,
+ SchedulingNodeSelector* selector,
+ SchedulingLatencyVisitorARM* arm_latency_visitor)
+ : HScheduler(arena, arm_latency_visitor, selector) {}
+ ~HSchedulerARM() OVERRIDE {}
+
+ bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+ HInstruction::InstructionKind::k##type:
+ switch (instruction->GetKind()) {
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
+ return true;
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+#undef CASE_INSTRUCTION_KIND
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 31d13e2a26..d87600aa5e 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -28,6 +28,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
// Return all combinations of ISA and code generator that are executable on
@@ -56,7 +60,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -65,133 +69,151 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class SchedulerTest : public CommonCompilerTest {};
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-TEST_F(SchedulerTest, DependencyGraph) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
- HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry);
- graph->AddBlock(block1);
- graph->SetEntryBlock(entry);
-
- // entry:
- // array ParameterValue
- // c1 IntConstant
- // c2 IntConstant
- // block1:
- // add1 Add [c1, c2]
- // add2 Add [add1, c2]
- // mul Mul [add1, add2]
- // div_check DivZeroCheck [add2] (env: add2, mul)
- // div Div [add1, div_check]
- // array_get1 ArrayGet [array, add1]
- // array_set1 ArraySet [array, add1, add2]
- // array_get2 ArrayGet [array, add1]
- // array_set2 ArraySet [array, add1, add2]
-
- HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- Primitive::kPrimNot);
- HInstruction* c1 = graph->GetIntConstant(1);
- HInstruction* c2 = graph->GetIntConstant(10);
- HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
- HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
- HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
- HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
- HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
- HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
- HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-
- DCHECK(div_check->CanThrow());
-
- entry->AddInstruction(array);
-
- HInstruction* block_instructions[] = {add1,
- add2,
- mul,
- div_check,
- div,
- array_get1,
- array_set1,
- array_get2,
- array_set2};
- for (auto instr : block_instructions) {
- block1->AddInstruction(instr);
+class SchedulerTest : public CommonCompilerTest {
+ public:
+ SchedulerTest() : pool_(), allocator_(&pool_) {
+ graph_ = CreateGraph(&allocator_);
}
- HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
- 2,
- graph->GetArtMethod(),
+ // Build scheduling graph, and run target specific scheduling on it.
+ void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->AddBlock(block1);
+ graph_->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // block1:
+ // add1 Add [c1, c2]
+ // add2 Add [add1, c2]
+ // mul Mul [add1, add2]
+ // div_check DivZeroCheck [add2] (env: add2, mul)
+ // div Div [add1, div_check]
+ // array_get1 ArrayGet [array, add1]
+ // array_set1 ArraySet [array, add1, add2]
+ // array_get2 ArrayGet [array, add1]
+ // array_set2 ArraySet [array, add1, add2]
+
+ HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
0,
- div_check);
- div_check->SetRawEnvironment(environment);
- environment->SetRawEnvAt(0, add2);
- add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
- environment->SetRawEnvAt(1, mul);
- mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
-
- ArenaAllocator* arena = graph->GetArena();
- CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
- SchedulingGraph scheduling_graph(&scheduler, arena);
- // Instructions must be inserted in reverse order into the scheduling graph.
- for (auto instr : ReverseRange(block_instructions)) {
- scheduling_graph.AddNode(instr);
+ Primitive::kPrimNot);
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* c2 = graph_->GetIntConstant(10);
+ HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2);
+ HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2);
+ HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2);
+ HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0);
+ HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+ HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+ DCHECK(div_check->CanThrow());
+
+ entry->AddInstruction(array);
+
+ HInstruction* block_instructions[] = {add1,
+ add2,
+ mul,
+ div_check,
+ div,
+ array_get1,
+ array_set1,
+ array_get2,
+ array_set2};
+ for (HInstruction* instr : block_instructions) {
+ block1->AddInstruction(instr);
+ }
+
+ HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_,
+ 2,
+ graph_->GetArtMethod(),
+ 0,
+ div_check);
+ div_check->SetRawEnvironment(environment);
+ environment->SetRawEnvAt(0, add2);
+ add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+ environment->SetRawEnvAt(1, mul);
+ mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+ SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+ // Instructions must be inserted in reverse order into the scheduling graph.
+ for (HInstruction* instr : ReverseRange(block_instructions)) {
+ scheduling_graph.AddNode(instr);
+ }
+
+ // Should not have dependencies cross basic blocks.
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+ // Define-use dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+ // Read and write dependencies
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+ // Env dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+ // CanThrow.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+
+ // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor.
+ scheduler->Schedule(graph_);
}
- // Should not have dependencies cross basic blocks.
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
-
- // Define-use dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
-
- // Read and write dependencies
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
-
- // Env dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
- ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
-
- // CanThrow.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+ void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ HGraph* graph = CreateCFG(&allocator_, data);
+
+ // Schedule the graph randomly.
+ HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+ scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+ RunCode(target_config,
+ graph,
+ [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+ has_result, expected);
+ }
+ }
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+};
+
+#if defined(ART_ENABLE_CODEGEN_arm64)
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
#endif
-static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
- bool has_result,
- int expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateCFG(&arena, data);
-
- // Schedule the graph randomly.
- HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
- scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
-
- RunCode(target_config,
- graph,
- [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
- has_result, expected);
- }
+#if defined(ART_ENABLE_CODEGEN_arm)
+TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+ arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
+#endif
TEST_F(SchedulerTest, RandomScheduling) {
//
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index eedaf6e67e..98ded24257 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -56,7 +56,7 @@ static bool IsInBootImage(ArtMethod* method) {
const std::vector<gc::space::ImageSpace*>& image_spaces =
Runtime::Current()->GetHeap()->GetBootImageSpaces();
for (gc::space::ImageSpace* image_space : image_spaces) {
- const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
+ const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection();
if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
return true;
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5c36110cf6..2ff9018510 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -924,9 +924,11 @@ class Thumb2Assembler FINAL : public ArmAssembler {
class ScopedForce32Bit {
public:
- explicit ScopedForce32Bit(Thumb2Assembler* assembler)
+ explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true)
: assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
- assembler->Force32Bit();
+ if (force) {
+ assembler->Force32Bit();
+ }
}
~ScopedForce32Bit() {