diff options
author | 2016-07-07 15:37:02 +0100 | |
---|---|---|
committer | 2016-07-08 14:33:16 +0100 | |
commit | ebdbf4b6f213fb98654604073985fb074c7beca1 (patch) | |
tree | 4901cc586575c92b87e72f159ce3335805e0fb91 | |
parent | 74c0d1bb67f9c6ee8306f0318ab7251d56dc99d6 (diff) |
ARM: Use 64-bit literals for LoadDImmediate().
And rewrite the medium-range long/fp literal to use
MOVW+ADD+LDRD/VLDR because the old instruction sequence
was broken if the "ADD ip, pc" was not 4-byte aligned.
Test: assembler_thumb2_test has been updated. Standard
ART test suite has been run on host and Nexus 5.
Change-Id: I37c6a62aa6e77c6a9701b5a1fb4db2e666c1eae9
-rw-r--r-- | compiler/utils/arm/assembler_arm.h | 27 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_arm32.cc | 28 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_arm32.h | 1 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2.cc | 35 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 15 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2_test.cc | 61 |
6 files changed, 112 insertions, 55 deletions
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index a571d14a71..9cf72a2601 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -754,32 +754,7 @@ class ArmAssembler : public Assembler { } } - void LoadDImmediate(DRegister sd, double value, Condition cond = AL) { - if (!vmovd(sd, value, cond)) { - uint64_t int_value = bit_cast<uint64_t, double>(value); - if (int_value == bit_cast<uint64_t, double>(0.0)) { - // 0.0 is quite common, so we special case it by loading - // 2.0 in `sd` and then substracting it. - bool success = vmovd(sd, 2.0, cond); - CHECK(success); - vsubd(sd, sd, sd, cond); - } else { - if (sd < 16) { - SRegister low = static_cast<SRegister>(sd << 1); - SRegister high = static_cast<SRegister>(low + 1); - LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond); - if (High32Bits(int_value) == Low32Bits(int_value)) { - vmovs(high, low); - } else { - LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond); - } - } else { - LOG(FATAL) << "Unimplemented loading of double into a D register " - << "that cannot be split into two S registers"; - } - } - } - } + virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0; virtual void MarkExceptionHandler(Label* label) = 0; virtual void LoadFromOffset(LoadOperandType type, diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 6f7119d578..c95dfa8066 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1486,6 +1486,34 @@ void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) { } } +void Arm32Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) { + if (!vmovd(dd, value, cond)) { + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (int_value == bit_cast<uint64_t, double>(0.0)) { + // 0.0 is quite common, so we special case it by loading + // 2.0 in `dd` and then subtracting it. + bool success = vmovd(dd, 2.0, cond); + CHECK(success); + vsubd(dd, dd, dd, cond); + } else { + if (dd < 16) { + // Note: Depending on the particular CPU, this may cause register + // forwarding hazard, negatively impacting the performance. + SRegister low = static_cast<SRegister>(dd << 1); + SRegister high = static_cast<SRegister>(low + 1); + LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond); + if (High32Bits(int_value) == Low32Bits(int_value)) { + vmovs(high, low); + } else { + LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond); + } + } else { + LOG(FATAL) << "Unimplemented loading of double into a D register " + << "that cannot be split into two S registers"; + } + } + } +} // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetArm. diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 8726ac85fd..554dd2350b 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -270,6 +270,7 @@ class Arm32Assembler FINAL : public ArmAssembler { // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; + void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; void LoadFromOffset(LoadOperandType type, Register reg, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index a72ea410ce..8747dad5e5 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1917,7 +1917,7 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kLongOrFPLiteral1KiB: return 4u; - case kLongOrFPLiteral256KiB: + case kLongOrFPLiteral64KiB: return 10u; case kLongOrFPLiteralFar: return 14u; @@ -1989,7 +1989,7 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con break; case kLiteral1MiB: case kLiteral64KiB: - case kLongOrFPLiteral256KiB: + case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. diff -= 4; // One extra 32-bit MOV. @@ -2105,10 +2105,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) if (IsUint<10>(GetOffset(current_code_size))) { break; } - current_code_size += IncreaseSize(kLongOrFPLiteral256KiB); + current_code_size += IncreaseSize(kLongOrFPLiteral64KiB); FALLTHROUGH_INTENDED; - case kLongOrFPLiteral256KiB: - if (IsUint<18>(GetOffset(current_code_size))) { + case kLongOrFPLiteral64KiB: + if (IsUint<16>(GetOffset(current_code_size))) { break; } current_code_size += IncreaseSize(kLongOrFPLiteralFar); @@ -2269,11 +2269,10 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } - case kLongOrFPLiteral256KiB: { - int32_t offset = GetOffset(code_size); - int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff); + case kLongOrFPLiteral64KiB: { + int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); - int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_. + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. buffer->Store<int16_t>(location_, mov_encoding >> 16); buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); @@ -3598,6 +3597,24 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) { + if (!vmovd(dd, value, cond)) { + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (int_value == bit_cast<uint64_t, double>(0.0)) { + // 0.0 is quite common, so we special case it by loading + // 2.0 in `dd` and then subtracting it. + bool success = vmovd(dd, 2.0, cond); + CHECK(success); + vsubd(dd, dd, dd, cond); + } else { + Literal* literal = literal64_dedupe_map_.GetOrCreate( + int_value, + [this, int_value]() { return NewLiteral<uint64_t>(int_value); }); + LoadLiteral(dd, literal); + } + } +} + int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { switch (type) { case kLoadSignedByte: diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 2ca74fc863..4ee23c0e27 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -43,6 +43,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { fixups_(arena->Adapter(kArenaAllocAssembler)), fixup_dependents_(arena->Adapter(kArenaAllocAssembler)), literals_(arena->Adapter(kArenaAllocAssembler)), + literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)), jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0u), last_old_position_(0u), @@ -319,6 +320,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; + void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; void LoadFromOffset(LoadOperandType type, Register reg, @@ -464,8 +466,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load long or FP literal variants. // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. kLongOrFPLiteral1KiB, - // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. - kLongOrFPLiteral256KiB, + // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes. + kLongOrFPLiteral64KiB, // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. kLongOrFPLiteralFar, }; @@ -500,7 +502,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load wide literal. static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); return Fixup(rt, rt2, kNoSRegister, kNoDRegister, @@ -510,7 +512,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load FP single literal. static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, AL, kLoadFPLiteralSingle, size, location); @@ -519,7 +521,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load FP double literal. static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, AL, kLoadFPLiteralDouble, size, location); @@ -870,6 +872,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { // without invalidating pointers and references to existing elements. ArenaDeque<Literal> literals_; + // Deduplication map for 64-bit literals, used for LoadDImmediate(). + ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_; + // Jump table list. ArenaDeque<JumpTable> jump_tables_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 7f1dc49734..f3fa72ccc6 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -869,10 +869,11 @@ TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { } std::string expected = - "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x408 - 0x4 - 4)\n" "1:\n" "add ip, pc\n" - "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + "ldrd r1, r3, [ip, #0]\n" + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + ".align 2, 0\n" "2:\n" @@ -884,48 +885,78 @@ TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { __ GetAdjustedPosition(label.Position())); } -TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) { +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) { + // The literal size must match but the type doesn't, so use an int32_t rather than float. + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::S3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 15) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x10004 - 0x4 - 4)\n" + "1:\n" + "add ip, pc\n" + "vldr s3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralSingleMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) { // The literal size must match but the type doesn't, so use an int32_t rather than float. arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ ldr(arm::R0, arm::Address(arm::R0)); __ LoadLiteral(arm::S3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 3u; + constexpr size_t kLdrR0R0Count = (1 << 15) - 4u; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = - "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "ldr r0, [r0]\n" + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x10004 - 0x6 - 4)\n" "1:\n" "add ip, pc\n" - "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + "vldr s3, [ip, #0]\n" + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + ".align 2, 0\n" "2:\n" ".word 0x12345678\n"; - DriverStr(expected, "LoadLiteralSingleMax256KiB"); + DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC"); EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, __ GetAdjustedPosition(label.Position())); } -TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) { // The literal size must match but the type doesn't, so use an int64_t rather than double. arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); __ LoadLiteral(arm::D3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 2u; + constexpr size_t kLdrR0R0Count = (1 << 15) - 2u; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. - "movw ip, #(0x40000 & 0xffff)\n" + "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n" // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. - "movt ip, #(0x40000 >> 16)\n" + "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n" "1:\n" "add ip, pc\n" "vldr d3, [ip, #0]\n" + @@ -934,7 +965,7 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { "2:\n" ".word 0x87654321\n" ".word 0x12345678\n"; - DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB"); + DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB"); EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, __ GetAdjustedPosition(label.Position())); @@ -946,16 +977,16 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { __ LoadLiteral(arm::D3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234; + constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. - "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n" + "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n" // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. - "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n" + "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n" "1:\n" "add ip, pc\n" "vldr d3, [ip, #0]\n" + |