diff options
| author | 2015-07-28 16:41:21 -0400 | |
|---|---|---|
| committer | 2015-07-30 08:52:54 -0400 | |
| commit | 9097981b79f89bd341d46256678b3c05ddd2d0a9 (patch) | |
| tree | f466b5819420182a73068e0f0b598878141ecc7a | |
| parent | 595335100a947693b9af5fb6c0b5b3c1f0b91788 (diff) | |
Optimizing: Replace x86 xchg use with xor sequence
On some x86 processors, xchg is serializing even when exchanging two
registers. Replace the xchgl use with the 3 xor sequence to swap to
registers. This is generally faster and doesn't serialize the machine.
Change-Id: Iea2cd993d3b70a103bbdd1dbf7818e26ae29387c
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e15eff9056..676b8421cd 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4535,7 +4535,11 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary. + DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>()); + __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { |