x86 updates GenInlinedUnsafePut/GenInstanceofFinal
Allow x86 to inline GenInlinedUnsafePut by freeing up a temporary
register early. Make an x86 specific version of GenInstanceofFinal that
uses compare to memory and a setCC instruction.
Change-Id: I67788d7ae83776b0b9069fe4b379452190774992
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index d8b9869..ffe85af 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -944,6 +944,9 @@
// question with simple comparisons.
void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
RegLocation rl_src) {
+ // X86 has its own implementation.
+ DCHECK_NE(cu_->instruction_set, kX86);
+
RegLocation object = LoadValue(rl_src, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
int result_reg = rl_result.low_reg;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index f865207..eeecb3c 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1216,10 +1216,6 @@
// TODO - add Mips implementation
return false;
}
- if (cu_->instruction_set == kX86 && is_object) {
- // TODO: fix X86, it exhausts registers for card marking.
- return false;
- }
// Unused - RegLocation rl_src_unsafe = info->args[0];
RegLocation rl_src_obj = info->args[1]; // Object
RegLocation rl_src_offset = info->args[2]; // long low
@@ -1239,6 +1235,9 @@
rl_value = LoadValue(rl_src_value, kCoreReg);
StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
}
+
+ // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
+ FreeTemp(rl_offset.low_reg);
if (is_volatile) {
GenMemBarrier(kStoreLoad);
}
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 10136b6..2a35af1 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -918,9 +918,10 @@
*/
RegLocation ForceTempWide(RegLocation loc);
+ virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+ RegLocation rl_dest, RegLocation rl_src);
+
private:
- void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
- RegLocation rl_src);
void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
bool type_known_abstract, bool use_declaring_class,
bool can_assume_type_is_in_dex_cache,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6280b64..9f90216 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -180,6 +180,15 @@
*/
void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+ /**
+ * @brief Implement instanceof a final class with x86 specific code.
+ * @param use_declaring_class 'true' if we can use the class itself.
+ * @param type_idx Type index to use if use_declaring_class is 'false'.
+ * @param rl_dest Result to be set to 0 or 1.
+ * @param rl_src Object to be tested.
+ */
+ void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+ RegLocation rl_dest, RegLocation rl_src);
// Single operation generators.
LIR* OpUnconditionalBranch(LIR* target);
LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index e665f70..7d0ba27 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1650,4 +1650,72 @@
StoreFinalValueWide(rl_dest, rl_result);
}
+// For final classes there are no sub-classes to check and so we can answer the instance-of
+// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
+void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+ RegLocation rl_dest, RegLocation rl_src) {
+ RegLocation object = LoadValue(rl_src, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ int result_reg = rl_result.low_reg;
+
+ // SETcc only works with EAX..EDX.
+ if (result_reg == object.low_reg || result_reg >= 4) {
+ result_reg = AllocTypedTemp(false, kCoreReg);
+ DCHECK_LT(result_reg, 4);
+ }
+
+ // Assume that there is no match.
+ LoadConstant(result_reg, 0);
+ LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL);
+
+ int check_class = AllocTypedTemp(false, kCoreReg);
+
+ // If Method* is already in a register, we can save a copy.
+ RegLocation rl_method = mir_graph_->GetMethodLoc();
+ int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
+ (sizeof(mirror::Class*) * type_idx);
+
+ if (rl_method.location == kLocPhysReg) {
+ if (use_declaring_class) {
+ LoadWordDisp(rl_method.low_reg,
+ mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+ check_class);
+ } else {
+ LoadWordDisp(rl_method.low_reg,
+ mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+ check_class);
+ LoadWordDisp(check_class, offset_of_type, check_class);
+ }
+ } else {
+ LoadCurrMethodDirect(check_class);
+ if (use_declaring_class) {
+ LoadWordDisp(check_class,
+ mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+ check_class);
+ } else {
+ LoadWordDisp(check_class,
+ mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+ check_class);
+ LoadWordDisp(check_class, offset_of_type, check_class);
+ }
+ }
+
+ // Compare the computed class to the class in the object.
+ DCHECK_EQ(object.location, kLocPhysReg);
+ OpRegMem(kOpCmp, check_class, object.low_reg,
+ mirror::Object::ClassOffset().Int32Value());
+
+ // Set the low byte of the result to 0 or 1 from the compare condition code.
+ NewLIR2(kX86Set8R, result_reg, kX86CondEq);
+
+ LIR* target = NewLIR0(kPseudoTargetLabel);
+ null_branchover->target = target;
+ FreeTemp(check_class);
+ if (IsTemp(result_reg)) {
+ OpRegCopy(rl_result.low_reg, result_reg);
+ FreeTemp(result_reg);
+ }
+ StoreValue(rl_dest, rl_result);
+}
+
} // namespace art