diff options
| author | 2014-04-30 13:40:19 +0700 | |
|---|---|---|
| committer | 2014-05-28 23:02:32 +0700 | |
| commit | a20468c004264592f309a548fc71ba62a69b8742 (patch) | |
| tree | 0bdc5eed4eb7bdf35384241c1b04313613c0a934 /compiler/dex | |
| parent | 96992e8f2eddba05dc38a15cc7d4e705e8db4022 (diff) | |
x86_64: Support r8-r15, xmm8-xmm15 in assembler
Added REX support. The TARGET_REX_SUPPORT should be used during build.
Change-Id: I82b457ff5085c8192ad873923bd939fbb91022ce
Signed-off-by: Dmitry Petrochenko <dmitry.petrochenko@intel.com>
Diffstat (limited to 'compiler/dex')
| -rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 304 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 8 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 71 |
4 files changed, 301 insertions, 84 deletions
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index b8222efa6e..39a036560e 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -478,7 +478,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" }, }; -size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) { +size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, + int reg_r, int reg_x, bool has_sib) { size_t size = 0; if (entry->skeleton.prefix1 > 0) { ++size; @@ -486,6 +487,10 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa ++size; } } + if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) && + entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { + ++size; // REX_R + } ++size; // opcode if (entry->skeleton.opcode == 0x0F) { ++size; @@ -494,13 +499,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa } } ++size; // modrm - if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum() + if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum() || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) { // SP requires a SIB byte. // GS access also needs a SIB byte for absolute adressing in 64-bit mode. ++size; } - if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) { + if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) { // BP requires an explicit displacement, even when it's 0. if (entry->opcode != kX86Lea32RA) { DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name; @@ -524,33 +529,41 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kPrefix2Nullary: return 3; // 1 byte of opcode + 2 prefixes case kRegOpcode: // lir operands - 0: reg - return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm + // substract 1 for modrm + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1; case kReg: // lir operands - 0: reg - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kMem: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kMemReg: // lir operands - 0: base, 1: disp, 2: reg - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], + lir->operands[2], NO_REG, false); case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], + lir->operands[2], NO_REG, false); case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + lir->operands[4], lir->operands[1], true); case kThreadReg: // lir operands - 0: disp, 1: reg - return ComputeSize(entry, 0, lir->operands[0], false); - case kRegReg: - return ComputeSize(entry, 0, 0, false); - case kRegRegStore: - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false); + case kRegReg: // lir operands - 0: reg1, 1: reg2 + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); + case kRegRegStore: // lir operands - 0: reg2, 1: reg1 + return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false); case kRegMem: // lir operands - 0: reg, 1: base, 2: disp - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], NO_REG, false); case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp - return ComputeSize(entry, lir->operands[1], lir->operands[4], true); + return ComputeSize(entry, lir->operands[1], lir->operands[4], + lir->operands[0], lir->operands[2], true); case kRegThread: // lir operands - 0: reg, 1: disp - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false); case kRegImm: { // lir operands - 0: reg, 1: immediate - size_t size = ComputeSize(entry, 0, 0, false); + size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); if (entry->skeleton.ax_opcode == 0) { return size; } else { @@ -560,47 +573,58 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } } case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], + NO_REG, lir->operands[0], false); case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kThreadImm: // lir operands - 0: disp, 1: imm - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm case kRegRegImmRev: - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], NO_REG, false); case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm - return ComputeSize(entry, lir->operands[1], lir->operands[4], true); + return ComputeSize(entry, lir->operands[1], lir->operands[4], + lir->operands[0], lir->operands[2], true); case kMovRegImm: // lir operands - 0: reg, 1: immediate - return (entry->skeleton.prefix1 != 0?1:0) + 1 + entry->skeleton.immediate_bytes; // TODO(64): reg1 + return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) + + 1 + entry->skeleton.immediate_bytes; case kShiftRegImm: // lir operands - 0: reg, 1: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, 0, 0, false) - (lir->operands[1] == 1 ? 1 : 0); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - + (lir->operands[1] == 1 ? 1 : 0); case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[1], false) - + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) - (lir->operands[2] == 1 ? 1 : 0); case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[3], true) - + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true) - (lir->operands[4] == 1 ? 1 : 0); - case kShiftRegCl: - return ComputeSize(entry, 0, 0, false); + case kShiftRegCl: // lir operands - 0: reg, 1: cl + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + lir->operands[4], lir->operands[1], true); case kRegCond: // lir operands - 0: reg, 1: cond - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kMemCond: // lir operands - 0: base, 1: disp, 2: cond - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kRegRegCond: // lir operands - 0: reg, 1: reg, 2: cond - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); case kRegMemCond: // lir operands - 0: reg, 1: reg, 2: disp, 3:cond - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], lir->operands[1], false); case kJcc: if (lir->opcode == kX86Jcc8) { return 2; // opcode + rel8 @@ -614,21 +638,28 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } else if (lir->opcode == kX86Jmp32) { return 5; // opcode + rel32 } else if (lir->opcode == kX86JmpT) { - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); } else { DCHECK(lir->opcode == kX86JmpR); - return 2; // opcode + modrm + if (NeedsRex(lir->operands[0])) { + return 3; // REX.B + opcode + modrm + } else { + return 2; // opcode + modrm + } } case kCall: switch (lir->opcode) { case kX86CallI: return 5; // opcode 0:disp case kX86CallR: return 2; // opcode modrm case kX86CallM: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kX86CallT: // lir operands - 0: disp - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); default: break; } @@ -636,16 +667,19 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kPcRel: if (entry->opcode == kX86PcRelLoadRA) { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table - return ComputeSize(entry, lir->operands[1], 0x12345678, true); + return ComputeSize(entry, lir->operands[1], 0x12345678, + lir->operands[0], lir->operands[2], true); } else { DCHECK(entry->opcode == kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate } - case kMacro: + case kMacro: // lir operands - 0: reg DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) - - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); // shorter ax encoding + ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, + lir->operands[0], NO_REG, false) - + // shorter ax encoding + (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); default: break; } @@ -654,19 +688,62 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) { + EmitPrefix(entry, NO_REG, NO_REG, NO_REG); +} + +void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { + // REX.WRXB + // W - 64-bit operand + // R - MODRM.reg + // X - SIB.index + // B - MODRM.rm/SIB.base + bool force = false; + bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W); + bool r = NeedsRex(reg_r); + bool x = NeedsRex(reg_x); + bool b = NeedsRex(reg_b); + uint8_t rex = force ? 0x40 : 0; + if (w) { + rex |= 0x48; // REX.W000 + } + if (r) { + rex |= 0x44; // REX.0R00 + } + if (x) { + rex |= 0x42; // REX.00X0 + } + if (b) { + rex |= 0x41; // REX.000B + } if (entry->skeleton.prefix1 != 0) { if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) { // 64 bit adresses by GS, not FS code_buffer_.push_back(THREAD_PREFIX_GS); } else { - code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix1 == REX_W) { + rex |= entry->skeleton.prefix1; + code_buffer_.push_back(rex); + rex = 0; + } else { + code_buffer_.push_back(entry->skeleton.prefix1); + } } if (entry->skeleton.prefix2 != 0) { - code_buffer_.push_back(entry->skeleton.prefix2); + if (entry->skeleton.prefix2 == REX_W) { + rex |= entry->skeleton.prefix2; + code_buffer_.push_back(rex); + rex = 0; + } else { + code_buffer_.push_back(entry->skeleton.prefix2); + } } } else { DCHECK_EQ(0, entry->skeleton.prefix2); } + if (rex != 0) { + code_buffer_.push_back(rex); + } } void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { @@ -685,7 +762,12 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { } void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) { - EmitPrefix(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); +} + +void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { + EmitPrefix(entry, reg_r, reg_x, reg_b); EmitOpcode(entry); } @@ -766,6 +848,11 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { code_buffer_.push_back((imm >> 8) & 0xFF); break; case 4: + if (imm <0) { + CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm); + } else { + CHECK_EQ(imm & 0x0FFFFFFFFl, imm); + } code_buffer_.push_back(imm & 0xFF); code_buffer_.push_back((imm >> 8) & 0xFF); code_buffer_.push_back((imm >> 16) & 0xFF); @@ -789,7 +876,8 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { } void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); // There's no 3-byte instruction with +rd DCHECK(entry->skeleton.opcode != 0x0F || (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A)); @@ -801,7 +889,8 @@ void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { } void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg)) @@ -815,7 +904,8 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { } void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -827,15 +917,29 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, index, base); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } +uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) { + uint8_t res = reg; + res = reg & kRegNumMask32; // 3 bits + return res; +} + +bool X86Mir2Lir::NeedsRex(uint8_t reg) { + return RegStorage::RegNum(reg) > 7; +} + void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, base); + reg = LowRegisterBits(reg); + base = LowRegisterBits(base); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL || entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM) @@ -854,9 +958,12 @@ void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, EmitMemReg(entry, base, disp, reg); } -void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index, - int scale, int disp) { - EmitPrefixAndOpcode(entry); +void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, + uint8_t index, int scale, int disp) { + EmitPrefixAndOpcode(entry, reg, index, base); + reg = LowRegisterBits(reg); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(reg, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -871,7 +978,9 @@ void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, index, base); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); @@ -879,7 +988,8 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) { DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg)) @@ -897,7 +1007,9 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis } void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); DCHECK_LT(RegStorage::RegNum(reg1), 8); DCHECK_LT(RegStorage::RegNum(reg2), 8); uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); @@ -909,7 +1021,9 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t r void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); DCHECK_LT(RegStorage::RegNum(reg1), 8); DCHECK_LT(RegStorage::RegNum(reg2), 8); uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); @@ -926,7 +1040,9 @@ void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry, void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, base); + reg = LowRegisterBits(reg); + base = LowRegisterBits(base); DCHECK(!RegStorage::IsFloat(reg)); DCHECK_LT(RegStorage::RegNum(reg), 8); EmitModrmDisp(reg, base, disp); @@ -941,10 +1057,11 @@ void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry, } void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, reg); if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) { code_buffer_.push_back(entry->skeleton.ax_opcode); } else { + reg = LowRegisterBits(reg); EmitOpcode(entry); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); code_buffer_.push_back(modrm); @@ -953,7 +1070,8 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { } void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); @@ -971,6 +1089,8 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { } void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) { + EmitPrefix(entry, NO_REG, NO_REG, reg); + reg = LowRegisterBits(reg); DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg)); switch (entry->skeleton.immediate_bytes) { @@ -991,12 +1111,14 @@ void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t code_buffer_.push_back((imm >> 56) & 0xFF); break; default: - LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: " << static_cast<uint32_t>(entry->skeleton.immediate_bytes); + LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: " + << static_cast<uint32_t>(entry->skeleton.immediate_bytes); } } void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, reg); + reg = LowRegisterBits(reg); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1023,7 +1145,8 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) { DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry); + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1038,7 +1161,8 @@ void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl) { DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1051,7 +1175,8 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int displacement, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1070,7 +1195,8 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, } void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) { - EmitPrefix(entry); + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0x0F, entry->skeleton.opcode); code_buffer_.push_back(0x0F); @@ -1083,7 +1209,8 @@ void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t c DCHECK_EQ(entry->skeleton.immediate_bytes, 0); } -void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) { +void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, + uint8_t condition) { if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); if (entry->skeleton.prefix2 != 0) { @@ -1105,7 +1232,9 @@ void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int disp void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition) { // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_LE(condition, 0xF); @@ -1127,9 +1256,12 @@ void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8 code_buffer_.push_back(modrm); } -void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) { +void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, + int displacement, uint8_t condition) { // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, base); + reg1 = LowRegisterBits(reg1); + base = LowRegisterBits(base); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_LE(condition, 0xF); @@ -1162,8 +1294,10 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { code_buffer_.push_back(rel & 0xFF); } else { DCHECK(entry->opcode == kX86JmpR); - code_buffer_.push_back(entry->skeleton.opcode); uint8_t reg = static_cast<uint8_t>(rel); + EmitPrefix(entry, NO_REG, NO_REG, reg); + code_buffer_.push_back(entry->skeleton.opcode); + reg = LowRegisterBits(reg); DCHECK_LT(RegStorage::RegNum(reg), 8); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); code_buffer_.push_back(modrm); @@ -1188,7 +1322,8 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) { } void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); @@ -1229,9 +1364,12 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table)); disp = tab_rec->offset; } - EmitPrefix(entry); - DCHECK_LT(RegStorage::RegNum(reg), 8); if (entry->opcode == kX86PcRelLoadRA) { + EmitPrefix(entry, reg, index, base_or_table); + reg = LowRegisterBits(reg); + base_or_table = LowRegisterBits(base_or_table); + index = LowRegisterBits(index); + DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1246,6 +1384,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, code_buffer_.push_back(sib); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } else { + DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg)); } code_buffer_.push_back(disp & 0xFF); @@ -1258,6 +1397,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) { DCHECK(entry->opcode == kX86StartOfMethod) << entry->name; + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); code_buffer_.push_back(0xE8); // call +0 code_buffer_.push_back(0); code_buffer_.push_back(0); @@ -1532,7 +1673,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]); break; case kRegMemCond: // lir operands - 0: reg, 1: reg, displacement, 3: condition - EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); + EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3]); break; case kJmp: // lir operands - 0: rel if (entry->opcode == kX86JmpT) { @@ -1566,7 +1708,7 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kMacro: + case kMacro: // lir operands - 0: reg EmitMacro(entry, lir->operands[0], lir->offset); break; default: diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index fcc846f53c..d66790d405 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -331,10 +331,16 @@ class X86Mir2Lir : public Mir2Lir { std::vector<uint8_t>* ReturnCallFrameInformation(); protected: - size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib); + size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, + int reg_r, int reg_x, bool has_sib); + uint8_t LowRegisterBits(uint8_t reg); + bool NeedsRex(uint8_t reg); void EmitPrefix(const X86EncodingMap* entry); + void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); void EmitOpcode(const X86EncodingMap* entry); void EmitPrefixAndOpcode(const X86EncodingMap* entry); + void EmitPrefixAndOpcode(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); void EmitDisp(uint8_t base, int disp); void EmitModrmThread(uint8_t reg_or_opcode); void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 19ad2f8ff1..8b341682c3 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -38,7 +38,7 @@ static const RegStorage core_regs_arr_64[] = { static const RegStorage core_regs_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q, #ifdef TARGET_REX_SUPPORT - rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 + rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q #endif }; static const RegStorage sp_regs_arr_32[] = { diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 24c64cddfc..bb8df893f8 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -175,6 +175,16 @@ enum X86NativeRegisterPool { fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, +#ifdef TARGET_REX_SUPPORT + fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8, + fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9, + fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10, + fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11, + fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12, + fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13, + fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14, + fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15, +#endif // xmm registers, double precision aliases. dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, @@ -185,8 +195,18 @@ enum X86NativeRegisterPool { dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, +#ifdef TARGET_REX_SUPPORT + dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, + dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, + dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, + dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11, + dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, + dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, + dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, + dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, +#endif - // xmm registers aliases. + // xmm registers, quad precision aliases xr0 = RegStorage::k128BitSolo | 0, xr1 = RegStorage::k128BitSolo | 1, xr2 = RegStorage::k128BitSolo | 2, @@ -195,6 +215,16 @@ enum X86NativeRegisterPool { xr5 = RegStorage::k128BitSolo | 5, xr6 = RegStorage::k128BitSolo | 6, xr7 = RegStorage::k128BitSolo | 7, +#ifdef TARGET_REX_SUPPORT + xr8 = RegStorage::k128BitSolo | 8, + xr9 = RegStorage::k128BitSolo | 9, + xr10 = RegStorage::k128BitSolo | 10, + xr11 = RegStorage::k128BitSolo | 11, + xr12 = RegStorage::k128BitSolo | 12, + xr13 = RegStorage::k128BitSolo | 13, + xr14 = RegStorage::k128BitSolo | 14, + xr15 = RegStorage::k128BitSolo | 15, +#endif // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; @@ -251,6 +281,16 @@ constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_fr8(RegStorage::kValid | fr8); +constexpr RegStorage rs_fr9(RegStorage::kValid | fr9); +constexpr RegStorage rs_fr10(RegStorage::kValid | fr10); +constexpr RegStorage rs_fr11(RegStorage::kValid | fr11); +constexpr RegStorage rs_fr12(RegStorage::kValid | fr12); +constexpr RegStorage rs_fr13(RegStorage::kValid | fr13); +constexpr RegStorage rs_fr14(RegStorage::kValid | fr14); +constexpr RegStorage rs_fr15(RegStorage::kValid | fr15); +#endif constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); @@ -260,6 +300,16 @@ constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_dr8(RegStorage::kValid | dr8); +constexpr RegStorage rs_dr9(RegStorage::kValid | dr9); +constexpr RegStorage rs_dr10(RegStorage::kValid | dr10); +constexpr RegStorage rs_dr11(RegStorage::kValid | dr11); +constexpr RegStorage rs_dr12(RegStorage::kValid | dr12); +constexpr RegStorage rs_dr13(RegStorage::kValid | dr13); +constexpr RegStorage rs_dr14(RegStorage::kValid | dr14); +constexpr RegStorage rs_dr15(RegStorage::kValid | dr15); +#endif constexpr RegStorage rs_xr0(RegStorage::kValid | xr0); constexpr RegStorage rs_xr1(RegStorage::kValid | xr1); @@ -269,6 +319,16 @@ constexpr RegStorage rs_xr4(RegStorage::kValid | xr4); constexpr RegStorage rs_xr5(RegStorage::kValid | xr5); constexpr RegStorage rs_xr6(RegStorage::kValid | xr6); constexpr RegStorage rs_xr7(RegStorage::kValid | xr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_xr8(RegStorage::kValid | xr8); +constexpr RegStorage rs_xr9(RegStorage::kValid | xr9); +constexpr RegStorage rs_xr10(RegStorage::kValid | xr10); +constexpr RegStorage rs_xr11(RegStorage::kValid | xr11); +constexpr RegStorage rs_xr12(RegStorage::kValid | xr12); +constexpr RegStorage rs_xr13(RegStorage::kValid | xr13); +constexpr RegStorage rs_xr14(RegStorage::kValid | xr14); +constexpr RegStorage rs_xr15(RegStorage::kValid | xr15); +#endif extern X86NativeRegisterPool rX86_ARG0; extern X86NativeRegisterPool rX86_ARG1; @@ -628,6 +688,15 @@ struct X86EncodingMap { // 64 Bit Operand Size #define REX_W 0x48 // Extension of the ModR/M reg field +#define REX_R 0x44 +// Extension of the SIB index field +#define REX_X 0x42 +// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field +#define REX_B 0x41 +// Mask extracting the least 3 bits of r0..r15 +#define kRegNumMask32 0x07 +// Value indicating that base or reg is not used +#define NO_REG 0 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127)) #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767)) |