Add interface check cast fast path to arm, arm64, x86
Bug: 12687968
Bug: 32577579
Test: test-art-host, test-art-target CC
Change-Id: Ia57099d499fa704803cc5f0135f0f53fefe39826
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index eb748f8..af46bab 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6357,12 +6357,26 @@
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
- return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck);
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// InteraceCheck has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+ return 2;
+ }
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
}
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6393,11 +6407,8 @@
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
- // When read barriers are enabled, we need a temporary register for
- // some cases.
- if (TypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
- }
+ // When read barriers are enabled, we need a temporary register for some cases.
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6408,9 +6419,9 @@
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
- Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(0) :
- Location::NoLocation();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6426,7 +6437,11 @@
}
// /* HeapReference<Class> */ out = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
@@ -6591,26 +6606,46 @@
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
+ case TypeCheckKind::kInterfaceCheck:
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ // Require a register for the interface check since there is a loop that compares the class to
+ // a memory address.
+ locations->SetInAt(1, Location::RequiresRegister());
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86 uses this "temp" register too.
locations->AddTemp(Location::RequiresRegister());
- // When read barriers are enabled, we need an additional temporary
- // register for some cases.
- if (TypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
+ // When read barriers are enabled, we need an additional temporary register for some cases.
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+}
+
+static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck:
+ return !throws_into_catch && !kEmitCompilerReadBarrier;
+ case TypeCheckKind::kInterfaceCheck:
+ return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ return false;
}
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
}
void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
@@ -6621,20 +6656,22 @@
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
- Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(1) :
- Location::NoLocation();
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
- uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
- uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_GE(num_temps, 1u);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const uint32_t object_array_data_offset =
+ mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
bool is_type_check_slow_path_fatal =
- (type_check_kind == TypeCheckKind::kExactCheck ||
- type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
- !instruction->CanThrowIntoCatchBlock();
+ IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
+
SlowPathCode* type_check_slow_path =
new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
is_type_check_slow_path_fatal);
@@ -6647,12 +6684,16 @@
__ j(kEqual, &done);
}
- // /* HeapReference<Class> */ temp = obj->klass_
- GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
+
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<Register>());
} else {
@@ -6666,6 +6707,13 @@
}
case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
+
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
NearLabel loop;
@@ -6690,6 +6738,13 @@
}
case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
+
// Walk over the class hierarchy to find a match.
NearLabel loop;
__ Bind(&loop);
@@ -6714,6 +6769,13 @@
}
case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kEmitCompilerReadBarrier);
+
// Do an exact check.
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<Register>());
@@ -6738,10 +6800,7 @@
}
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved
- // and interface check cases.
- //
+ // We always go into the type check slow path for the unresolved check case.
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
// calling InvokeRuntime directly), as it would require to
@@ -6749,15 +6808,59 @@
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
- //
- // TODO: Introduce a new runtime entry point taking the object
- // to test (instead of its class) as argument, and let it deal
- // with the read barrier issues. This will let us refactor this
- // case of the `switch` code as it was previously (with a direct
- // call to the runtime not using a type checking slow path).
- // This should also be beneficial for the other cases above.
__ jmp(type_check_slow_path->GetEntryLabel());
break;
+
+ case TypeCheckKind::kInterfaceCheck: {
+ // Fast path for the interface check. Since we compare with a memory location in the inner
+ // loop we would need to have cls poisoned. However unpoisoning cls would reset the
+ // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump
+ // to the slow path if we are running under poisoning
+ if (!kPoisonHeapReferences) {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ /*emit_read_barrier*/ false);
+
+ // Try to avoid read barriers to improve the fast path. We can not get false positives by
+ // doing this.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ /*emit_read_barrier*/ false);
+
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ /*emit_read_barrier*/ false);
+ NearLabel is_null;
+ // Null iftable means it is empty.
+ __ testl(temp, temp);
+ __ j(kZero, &is_null);
+
+ // Loop through the iftable and check if any class matches.
+ __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
+
+ NearLabel start_loop;
+ __ Bind(&start_loop);
+ __ cmpl(cls.AsRegister<Register>(), Address(temp, object_array_data_offset));
+ __ j(kEqual, &done); // Return if same class.
+ // Go to next interface.
+ __ addl(temp, Immediate(2 * kHeapReferenceSize));
+ __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
+ __ j(kNotZero, &start_loop);
+ __ Bind(&is_null);
+ }
+
+ __ jmp(type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
@@ -6949,10 +7052,12 @@
void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
Location out,
Location obj,
- uint32_t offset) {
+ uint32_t offset,
+ bool emit_read_barrier) {
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
+ if (emit_read_barrier) {
+ DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)