ART: Implement HSelect

This patch adds a new HIR instruction to Optimizing. HSelect returns
one of two inputs based on the outcome of a condition.

This is only initial implementation which:
 - defines the new instruction,
 - repurposes BooleanSimplifier to emit it,
 - extends InstructionSimplifier to statically resolve it,
 - updates existing code and tests accordingly.

Code generators currently emit fallback if/then/else code and will be
updated in follow-up CLs to use platform-specific conditional moves
when possible.

Change-Id: Ib61b17146487ebe6b55350c2b589f0b971dcaaee
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a4a8c7c..6a4f924 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1218,11 +1218,14 @@
 }
 
 void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  if (Primitive::Is64BitType(dst_type)) {
-    Move64(dst, src);
+  HParallelMove move(GetGraph()->GetArena());
+  if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) {
+    move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr);
+    move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr);
   } else {
-    Move32(dst, src);
+    move.AddMove(src, dst, dst_type, nullptr);
   }
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
@@ -1559,10 +1562,36 @@
 
 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
-  GenerateTestAndBranch(deoptimize,
-                        /* condition_input_index */ 0,
-                        slow_path->GetEntryLabel(),
-                        /* false_target */ static_cast<Label*>(nullptr));
+  GenerateTestAndBranch<Label>(deoptimize,
+                               /* condition_input_index */ 0,
+                               slow_path->GetEntryLabel(),
+                               /* false_target */ nullptr);
+}
+
+void LocationsBuilderX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  Primitive::Type select_type = select->GetType();
+  HInstruction* cond = select->GetCondition();
+
+  if (Primitive::IsFloatingPointType(select_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::Any());
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    locations->SetInAt(2, Location::Any());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  NearLabel false_target;
+  GenerateTestAndBranch<NearLabel>(
+      select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
 }
 
 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -5481,13 +5510,31 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    } else if (destination.IsFpuRegister()) {
+      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
     }
+  } else if (source.IsRegisterPair()) {
+      size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
+      // Create stack space for 2 elements.
+      __ subl(ESP, Immediate(2 * elem_size));
+      __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
+      __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+      __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+      // And remove the temporary stack space we allocated.
+      __ addl(ESP, Immediate(2 * elem_size));
   } else if (source.IsFpuRegister()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegister()) {
+      __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsFpuRegister()) {
       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
+    } else if (destination.IsRegisterPair()) {
+      XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
+      __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
+      __ psrlq(src_reg, Immediate(32));
+      __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
     } else if (destination.IsStackSlot()) {
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else {
@@ -5504,7 +5551,11 @@
       MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
     }
   } else if (source.IsDoubleStackSlot()) {
-    if (destination.IsFpuRegister()) {
+    if (destination.IsRegisterPair()) {
+      __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsRegisterPairHigh<Register>(),
+              Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+    } else if (destination.IsFpuRegister()) {
       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;