x86 updates GenInlinedUnsafePut/GenInstanceofFinal

Allow x86 to inline GenInlinedUnsafePut by freeing up a temporary
register early.  Make an x86 specific version of GenInstanceofFinal that
uses compare to memory and a setCC instruction.

Change-Id: I67788d7ae83776b0b9069fe4b379452190774992
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index d8b9869..ffe85af 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -944,6 +944,9 @@
 // question with simple comparisons.
 void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                                  RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   RegLocation object = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   int result_reg = rl_result.low_reg;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index f865207..eeecb3c 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1216,10 +1216,6 @@
     // TODO - add Mips implementation
     return false;
   }
-  if (cu_->instruction_set == kX86 && is_object) {
-    // TODO: fix X86, it exhausts registers for card marking.
-    return false;
-  }
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
@@ -1239,6 +1235,9 @@
     rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
   }
+
+  // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
+  FreeTemp(rl_offset.low_reg);
   if (is_volatile) {
     GenMemBarrier(kStoreLoad);
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 10136b6..2a35af1 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -918,9 +918,10 @@
      */
     RegLocation ForceTempWide(RegLocation loc);
 
+    virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src);
+
   private:
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
     void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
                                     bool type_known_abstract, bool use_declaring_class,
                                     bool can_assume_type_is_in_dex_cache,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6280b64..9f90216 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -180,6 +180,15 @@
       */
     void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
+    /**
+     * @brief Implement instanceof a final class with x86 specific code.
+     * @param use_declaring_class 'true' if we can use the class itself.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                            RegLocation rl_dest, RegLocation rl_src);
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index e665f70..7d0ba27 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1650,4 +1650,72 @@
   StoreFinalValueWide(rl_dest, rl_result);
 }
 
+// For final classes there are no sub-classes to check and so we can answer the instance-of
+// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
+void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int result_reg = rl_result.low_reg;
+
+  // SETcc only works with EAX..EDX.
+  if (result_reg == object.low_reg || result_reg >= 4) {
+    result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK_LT(result_reg, 4);
+  }
+
+  // Assume that there is no match.
+  LoadConstant(result_reg, 0);
+  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL);
+
+  int check_class = AllocTypedTemp(false, kCoreReg);
+
+  // If Method* is already in a register, we can save a copy.
+  RegLocation rl_method = mir_graph_->GetMethodLoc();
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
+    (sizeof(mirror::Class*) * type_idx);
+
+  if (rl_method.location == kLocPhysReg) {
+    if (use_declaring_class) {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  } else {
+    LoadCurrMethodDirect(check_class);
+    if (use_declaring_class) {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  }
+
+  // Compare the computed class to the class in the object.
+  DCHECK_EQ(object.location, kLocPhysReg);
+  OpRegMem(kOpCmp, check_class, object.low_reg,
+           mirror::Object::ClassOffset().Int32Value());
+
+  // Set the low byte of the result to 0 or 1 from the compare condition code.
+  NewLIR2(kX86Set8R, result_reg, kX86CondEq);
+
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  null_branchover->target = target;
+  FreeTemp(check_class);
+  if (IsTemp(result_reg)) {
+    OpRegCopy(rl_result.low_reg, result_reg);
+    FreeTemp(result_reg);
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
 }  // namespace art