summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc20
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm64.cc77
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc129
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h3
-rw-r--r--compiler/optimizing/code_generator_mips.cc100
-rw-r--r--compiler/optimizing/code_generator_mips.h1
-rw-r--r--compiler/optimizing/code_generator_mips64.cc100
-rw-r--r--compiler/optimizing/code_generator_mips64.h1
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc10
-rw-r--r--compiler/optimizing/code_generator_x86.cc63
-rw-r--r--compiler/optimizing/code_generator_x86.h1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc72
-rw-r--r--compiler/optimizing/code_generator_x86_64.h1
-rw-r--r--compiler/optimizing/graph_checker.cc92
-rw-r--r--compiler/optimizing/graph_checker.h6
-rw-r--r--compiler/optimizing/graph_visualizer.cc45
-rw-r--r--compiler/optimizing/instruction_builder.cc107
-rw-r--r--compiler/optimizing/instruction_builder.h7
-rw-r--r--compiler/optimizing/instruction_simplifier.cc43
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc8
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc8
-rw-r--r--compiler/optimizing/intrinsics_mips.cc9
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc9
-rw-r--r--compiler/optimizing/intrinsics_x86.cc8
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc8
-rw-r--r--compiler/optimizing/nodes.cc11
-rw-r--r--compiler/optimizing/nodes.h241
-rw-r--r--compiler/optimizing/optimizing_compiler.cc42
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h1
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc14
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h2
-rw-r--r--compiler/optimizing/reference_type_propagation.cc35
-rw-r--r--compiler/optimizing/sharpening.cc69
-rw-r--r--compiler/optimizing/sharpening.h27
-rw-r--r--compiler/optimizing/superblock_cloner.cc704
-rw-r--r--compiler/optimizing/superblock_cloner.h323
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc121
39 files changed, 2268 insertions, 254 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 147df1e3e8..d893cc88c4 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -836,9 +836,23 @@ class BCEVisitor : public HGraphVisitor {
ValueRange array_range(&allocator_, lower, upper);
// Try index range obtained by dominator-based analysis.
ValueRange* index_range = LookupValueRange(index, block);
- if (index_range != nullptr && index_range->FitsIn(&array_range)) {
- ReplaceInstruction(bounds_check, index);
- return;
+ if (index_range != nullptr) {
+ if (index_range->FitsIn(&array_range)) {
+ ReplaceInstruction(bounds_check, index);
+ return;
+ } else if (index_range->IsConstantValueRange()) {
+ // If the non-constant index turns out to have a constant range,
+ // make one more attempt to get a constant in the array range.
+ ValueRange* existing_range = LookupValueRange(array_length, block);
+ if (existing_range != nullptr &&
+ existing_range->IsConstantValueRange()) {
+ ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower());
+ if (index_range->FitsIn(&constant_array_range)) {
+ ReplaceInstruction(bounds_check, index);
+ return;
+ }
+ }
+ }
}
// Try index range obtained by induction variable analysis.
// Disables dynamic bce if OOB is certain.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 3c5a37f958..2dafbf7f6d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -438,6 +438,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
return false;
+ case TypeCheckKind::kBitstringCheck:
+ return true;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13bbffa1e3..b47a5cf3c4 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2112,6 +2112,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ }
+ // Compare the bitstring bits to `path_to_root`.
+ __ Cmp(temp, path_to_root);
+}
+
void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
BarrierType type = BarrierAll;
@@ -3840,6 +3860,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -3848,7 +3870,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3861,7 +3889,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4047,6 +4077,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Cset(out, eq);
+ if (zero.IsLinked()) {
+ __ B(&done);
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -4069,7 +4116,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -4079,7 +4132,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 3u);
@@ -4260,6 +4315,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ B(ne, &start_loop);
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f92c94fda7..cc369de983 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -264,6 +264,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
vixl::aarch64::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch64::Register temp);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 577fe00dcd..504c6479cc 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2490,8 +2490,12 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
}
if (!skip_overflow_check) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
+ // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here.
+ for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+ DCHECK(!reg.Is(r4));
+ }
+ DCHECK(!kCoreCalleeSaves.Includes(r4));
+ vixl32::Register temp = r4;
__ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
// The load must immediately precede RecordPcInfo.
ExactAssemblyScope aas(GetVIXLAssembler(),
@@ -7191,6 +7195,67 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check,
+ vixl32::Register temp,
+ vixl32::FlagsUpdate flags_update) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+ // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root);
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else if (IsUint<16>(path_to_root)) {
+ if (temp.IsLow()) {
+ // Note: Optimized for size but contains one more dependent instruction than necessary.
+ // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+ // macro assembler would use the high reg IP for the constant by default.
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
+ __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else {
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ }
+ } else {
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root << (32u - mask_bits));
+ } else {
+ __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+ }
+ }
+ }
+}
+
HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
@@ -7382,6 +7447,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7390,7 +7457,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7405,7 +7478,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7645,6 +7720,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+ // If `out` is a low reg and we would have another low reg temp, we could
+ // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+ //
+ // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+ // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+ // would be the same and we would have fewer direct data dependencies.
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
+ break;
+ }
}
if (done.IsReferenced()) {
@@ -7662,7 +7757,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -7671,7 +7772,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location temp_loc = locations->GetTemp(0);
vixl32::Register temp = RegisterFrom(temp_loc);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7856,6 +7959,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
__ B(ne, &start_loop, /* far_target */ false);
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
if (done.IsReferenced()) {
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 38570bb0fe..bd815f45b3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -322,6 +322,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
vixl32::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch32::Register temp,
+ vixl::aarch32::FlagsUpdate flags_update);
void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5c8e46ed19..2ed0ab79a1 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1929,6 +1929,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // Only stype 0 is supported.
}
@@ -3289,12 +3317,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -3303,7 +3339,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3353,7 +3389,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bne(temp, cls, slow_path->GetEntryLabel());
+ __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
break;
}
@@ -3379,7 +3415,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqz(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bne(temp, cls, &loop);
+ __ Bne(temp, cls.AsRegister<Register>(), &loop);
break;
}
@@ -3394,7 +3430,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop;
__ Bind(&loop);
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -3417,7 +3453,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3476,7 +3512,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Go to next interface.
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bne(AT, cls, &loop);
+ __ Bne(AT, cls.AsRegister<Register>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnez(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -7207,6 +7257,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7215,7 +7267,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7227,7 +7285,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7257,7 +7315,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<Register>());
__ Sltiu(out, out, 1);
break;
}
@@ -7282,7 +7340,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
- __ Bne(out, cls, &loop);
+ __ Bne(out, cls.AsRegister<Register>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -7298,7 +7356,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop, success;
__ Bind(&loop);
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -7323,7 +7381,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
// Do an exact check.
MipsLabel success;
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -7355,7 +7413,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
instruction, /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
- __ Bne(out, cls, slow_path->GetEntryLabel());
+ __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -7387,6 +7445,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 32b3e4221f..ffeb3b00a7 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
void HandleShift(HBinaryOperation* operation);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bcfe051c90..3ae8a30754 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1775,6 +1775,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ GpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // only stype 0 is supported
}
@@ -2844,12 +2872,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -2858,7 +2894,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2908,7 +2944,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bnec(temp, cls, slow_path->GetEntryLabel());
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
break;
}
@@ -2934,7 +2970,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqzc(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bnec(temp, cls, &loop);
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
break;
}
@@ -2949,7 +2985,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop;
__ Bind(&loop);
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -2972,7 +3008,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3031,7 +3067,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
__ Daddiu(temp, temp, 2 * kHeapReferenceSize);
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bnec(AT, cls, &loop);
+ __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnezc(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -5524,6 +5574,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5532,7 +5584,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5544,7 +5602,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
GpuRegister out = out_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5574,7 +5632,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<GpuRegister>());
__ Sltiu(out, out, 1);
break;
}
@@ -5599,7 +5657,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
- __ Bnec(out, cls, &loop);
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -5615,7 +5673,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop, success;
__ Bind(&loop);
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -5640,7 +5698,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
// Do an exact check.
Mips64Label success;
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -5672,7 +5730,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
instruction, /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
- __ Bnec(out, cls, slow_path->GetEntryLabel());
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -5704,6 +5762,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
__ Bc(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index d479410f07..87d5a9c15a 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index ad8128a5b1..7b4b85d2fe 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -92,8 +92,8 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
__ pshufd(dst, dst, Immediate(0));
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
__ punpckldq(dst, tmp);
@@ -101,13 +101,13 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
break;
}
case DataType::Type::kFloat32:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
__ shufps(dst, dst, Immediate(0));
break;
case DataType::Type::kFloat64:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
__ shufpd(dst, dst, Immediate(0));
break;
default:
@@ -160,8 +160,8 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
__ movd(locations->Out().AsRegister<Register>(), src);
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ movd(locations->Out().AsRegisterPairLow<Register>(), src);
__ pshufd(tmp, src, Immediate(1));
__ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
@@ -1022,8 +1022,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
__ movd(dst, locations->InAt(0).AsRegister<Register>());
break;
case DataType::Type::kInt64: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
__ xorps(tmp, tmp);
__ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cbe9e0a35c..e85f9001b1 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6234,6 +6234,27 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the X86 memory model.
}
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if ((false) && mask_bits == 16u) {
+ // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
@@ -6426,6 +6447,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -6434,7 +6457,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for some cases.
@@ -6655,6 +6684,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6681,6 +6725,10 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -6900,6 +6948,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0082853184..2d14d4cdc2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 510eec4f30..9f8b1bb038 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5440,6 +5440,27 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the x86-64 memory model.
}
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ CpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if ((false) && mask_bits == 16u) {
+ // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
@@ -5812,6 +5833,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5820,7 +5843,13 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for
@@ -6049,6 +6078,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ if (zero.IsLinked()) {
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ } else {
+ __ setcc(kEqual, out);
+ // setcc only sets the low byte.
+ __ andl(out, Immediate(1));
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6075,6 +6125,10 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -6261,7 +6315,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
break;
}
- case TypeCheckKind::kInterfaceCheck:
+ case TypeCheckKind::kInterfaceCheck: {
// Fast path for the interface check. Try to avoid read barriers to improve the fast path.
// We can not get false positives by doing this.
// /* HeapReference<Class> */ temp = obj->klass_
@@ -6297,6 +6351,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// If `cls` was poisoned above, unpoison it.
__ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
if (done.IsLinked()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e86123ef01..97f8ec7ae0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8610..fbcbe3608e 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
#include "base/bit_vector-inl.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
namespace art {
@@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
}
}
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
- VisitInstruction(check);
- HInstruction* input = check->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name) {
+ if (!check->InputAt(input_pos)->IsIntConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
check->DebugName(),
check->GetId(),
- input->DebugName(),
- input->GetId()));
+ input_pos,
+ name,
+ check->InputAt(2)->DebugName(),
+ check->InputAt(2)->GetId()));
+ } else if (check_value) {
+ uint32_t actual_value =
+ static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+ if (actual_value != expected_value) {
+ AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+ check->DebugName(),
+ check->GetId(),
+ name,
+ actual_value,
+ expected_value));
+ }
}
}
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
- VisitInstruction(instruction);
- HInstruction* input = instruction->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
- instruction->DebugName(),
- instruction->GetId(),
- input->DebugName(),
- input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ VisitInstruction(check);
+ HInstruction* input = check->InputAt(1);
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ if (!input->IsNullConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
+ bool check_values = false;
+ BitString::StorageType expected_path_to_root = 0u;
+ BitString::StorageType expected_mask = 0u;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = check->GetClass().Get();
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ if (state == SubtypeCheckInfo::kAssigned) {
+ expected_path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+ expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+ check_values = true;
+ } else {
+ AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+ check->DebugName(),
+ check->GetId()));
+ }
+ }
+ CheckTypeCheckBitstringInput(
+ check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root");
+ CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask");
+ } else {
+ if (!input->IsLoadClass()) {
+ AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
}
}
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+ HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+ HandleTypeCheckInstruction(instruction);
+}
+
void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
HLoopInformation* loop_information = loop_header->GetLoopInformation();
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d240..dbedc40518 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+ void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name);
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
void HandleLoop(HBasicBlock* loop_header);
void HandleBooleanInput(HInstruction* instruction, size_t input_index);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 12c69889ab..55191214a6 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -389,16 +389,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("load_kind") << load_string->GetLoadKind();
}
- void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
- StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ StartAttributeStream("check_kind") << check->GetTypeCheckKind();
StartAttributeStream("must_do_null_check") << std::boolalpha
- << check_cast->MustDoNullCheck() << std::noboolalpha;
+ << check->MustDoNullCheck() << std::noboolalpha;
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ StartAttributeStream("path_to_root") << std::hex
+ << "0x" << check->GetBitstringPathToRoot() << std::dec;
+ StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+ }
+ }
+
+ void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+ HandleTypeCheckInstruction(check_cast);
}
void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
- StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
- StartAttributeStream("must_do_null_check") << std::boolalpha
- << instance_of->MustDoNullCheck() << std::noboolalpha;
+ HandleTypeCheckInstruction(instance_of);
}
void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
@@ -648,20 +655,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
}
+ // For the builder and the inliner, we want to add extra information on HInstructions
+ // that have reference types, and also HInstanceOf/HCheckcast.
if ((IsPass(HGraphBuilder::kBuilderPassName)
|| IsPass(HInliner::kInlinerPassName))
- && (instruction->GetType() == DataType::Type::kReference)) {
- ReferenceTypeInfo info = instruction->IsLoadClass()
- ? instruction->AsLoadClass()->GetLoadedClassRTI()
- : instruction->GetReferenceTypeInfo();
+ && (instruction->GetType() == DataType::Type::kReference ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast())) {
+ ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+ ? instruction->IsLoadClass()
+ ? instruction->AsLoadClass()->GetLoadedClassRTI()
+ : instruction->GetReferenceTypeInfo()
+ : instruction->IsInstanceOf()
+ ? instruction->AsInstanceOf()->GetTargetClassRTI()
+ : instruction->AsCheckCast()->GetTargetClassRTI();
ScopedObjectAccess soa(Thread::Current());
if (info.IsValid()) {
StartAttributeStream("klass")
<< mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
- StartAttributeStream("can_be_null")
- << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ if (instruction->GetType() == DataType::Type::kReference) {
+ StartAttributeStream("can_be_null")
+ << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ }
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
- } else if (instruction->IsLoadClass()) {
+ } else if (instruction->IsLoadClass() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast()) {
StartAttributeStream("klass") << "unresolved";
} else {
// The NullConstant may be added to the graph during other passes that happen between
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 64a1eccf60..0205c6a4d3 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1811,29 +1811,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
}
}
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (cls == nullptr) {
- return TypeCheckKind::kUnresolvedCheck;
- } else if (cls->IsInterface()) {
- return TypeCheckKind::kInterfaceCheck;
- } else if (cls->IsArrayClass()) {
- if (cls->GetComponentType()->IsObjectClass()) {
- return TypeCheckKind::kArrayObjectCheck;
- } else if (cls->CannotBeAssignedFromOtherTypes()) {
- return TypeCheckKind::kExactCheck;
- } else {
- return TypeCheckKind::kArrayCheck;
- }
- } else if (cls->IsFinal()) {
- return TypeCheckKind::kExactCheck;
- } else if (cls->IsAbstract()) {
- return TypeCheckKind::kAbstractClassCheck;
- } else {
- return TypeCheckKind::kClassHierarchyCheck;
- }
-}
-
void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
HLoadString* load_string =
new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
@@ -1848,22 +1825,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_
HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
- Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
- soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
- bool needs_access_check = true;
- if (klass != nullptr) {
- if (klass->IsPublic()) {
- needs_access_check = false;
- } else {
- ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
- if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
- needs_access_check = false;
- }
- }
- }
-
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
}
@@ -1908,25 +1871,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
return load_class;
}
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+ dex::TypeIndex type_index) {
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
+ ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass(
+ soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_);
+ // TODO: Avoid creating excessive handles if the method references the same class repeatedly.
+ // (Use a map on the local_allocator_.)
+ return handles_->NewHandle(klass);
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+ if (klass == nullptr) {
+ return true;
+ } else if (klass->IsPublic()) {
+ return false;
+ } else {
+ ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+ return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+ }
+}
+
void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
uint8_t destination,
uint8_t reference,
dex::TypeIndex type_index,
uint32_t dex_pc) {
HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
ScopedObjectAccess soa(Thread::Current());
- TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+ TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+ klass.Get(), code_generator_, compiler_driver_, needs_access_check);
+
+ HInstruction* class_or_null = nullptr;
+ HIntConstant* bitstring_path_to_root = nullptr;
+ HIntConstant* bitstring_mask = nullptr;
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ // TODO: Allow using the bitstring check also if we need an access check.
+ DCHECK(!needs_access_check);
+ class_or_null = graph_->GetNullConstant(dex_pc);
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ uint32_t path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+ uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+ bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+ bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+ } else {
+ class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+ }
+ DCHECK(class_or_null != nullptr);
+
if (instruction.Opcode() == Instruction::INSTANCE_OF) {
- AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+ AppendInstruction(new (allocator_) HInstanceOf(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
UpdateLocal(destination, current_block_->GetLastInstruction());
} else {
DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
// We emit a CheckCast followed by a BoundType. CheckCast is a statement
// which may throw. If it succeeds BoundType sets the new type of `object`
// for all subsequent uses.
- AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+ AppendInstruction(
+ new (allocator_) HCheckCast(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
UpdateLocal(reference, current_block_->GetLastInstruction());
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c53277..f78829232d 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -39,6 +39,7 @@ class DexCompilationUnit;
class HBasicBlockBuilder;
class Instruction;
class OptimizingCompilerStats;
+class ScopedObjectAccess;
class SsaBuilder;
class VariableSizedHandleScope;
@@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject {
bool needs_access_check)
REQUIRES_SHARED(Locks::mutator_lock_);
+ Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
// Returns the outer-most compiling method's class.
ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85dc1d..2538fa38f1 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -576,7 +576,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst
// Returns whether doing a type test between the class of `object` against `klass` has
// a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+ HInstruction* object,
+ /*out*/bool* outcome) {
DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +588,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
return false;
}
- ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
if (!class_rti.IsValid()) {
// Happens when the loaded class is unresolved.
return false;
@@ -611,8 +612,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
HInstruction* object = check_cast->InputAt(0);
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ check_cast->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -630,15 +631,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
// Note: The `outcome` is initialized to please valgrind - the compiler can reorder
// the return value check with the `outcome` check, b/27651442 .
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
if (outcome) {
check_cast->GetBlock()->RemoveInstruction(check_cast);
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
- if (!load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the checkcast was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = check_cast->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the checkcast was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
} else {
// Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +653,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
HInstruction* object = instruction->InputAt(0);
- HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ instruction->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -673,7 +677,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
// Note: The `outcome` is initialized to please valgrind - the compiler can reorder
// the return value check with the `outcome` check, b/27651442 .
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
if (outcome && can_be_null) {
// Type test will succeed, we just need a null test.
@@ -686,11 +690,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
}
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
- if (outcome && !load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the instanceof check was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = instruction->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the instanceof check was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
}
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca1b451e6b..2f8e33f941 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 99b8b5df74..830d0403e4 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 113c9de5a2..cafa5228d9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 521bad27e2..89f1818be2 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) {
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index baa410b884..46b7f3f1ce 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6dd8b8e1f5..6483b7cb2a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 727431a493..5587f87dae 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -865,6 +865,15 @@ void HLoopInformation::Populate() {
graph->SetHasLoops(true);
}
+void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) {
+ DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this);
+ blocks_.Union(&inner_loop->blocks_);
+ HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation();
+ if (outer_loop != nullptr) {
+ outer_loop->PopulateInnerLoopUpwards(this);
+ }
+}
+
HBasicBlock* HLoopInformation::GetPreHeader() const {
HBasicBlock* block = header_->GetPredecessors()[0];
DCHECK(irreducible_ || (block == header_->GetDominator()));
@@ -3096,6 +3105,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
return os << "array_object_check";
case TypeCheckKind::kArrayCheck:
return os << "array_check";
+ case TypeCheckKind::kBitstringCheck:
+ return os << "bitstring_check";
default:
LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2047954207..b0657d6f1c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -826,6 +826,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
// Finds blocks that are part of this loop.
void Populate();
+ // Updates blocks population of the loop and all of its outer' ones recursively after the
+ // population of the inner loop is updated.
+ void PopulateInnerLoopUpwards(HLoopInformation* inner_loop);
+
// Returns whether this loop information contains `block`.
// Note that this loop information *must* be populated before entering this function.
bool Contains(const HBasicBlock& block) const;
@@ -856,6 +860,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
bool HasExitEdge() const;
+ // Resets back edge and blocks-in-loop data.
+ void ResetBasicBlockData() {
+ back_edges_.clear();
+ ClearAllBlocks();
+ }
+
private:
// Internal recursive implementation of `Populate`.
void PopulateRecursive(HBasicBlock* block);
@@ -998,6 +1008,18 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
loop_information_->AddBackEdge(back_edge);
}
+ // Registers a back edge; if the block was not a loop header before the call associates a newly
+ // created loop info with it.
+ //
+ // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop
+ // info for all blocks during back edges recalculation.
+ void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) {
+ if (loop_information_ == nullptr || loop_information_->GetHeader() != this) {
+ loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_);
+ }
+ loop_information_->AddBackEdge(back_edge);
+ }
+
HGraph* GetGraph() const { return graph_; }
void SetGraph(HGraph* graph) { graph_ = graph; }
@@ -5929,8 +5951,7 @@ class HLoadClass FINAL : public HInstruction {
special_input_(HUserRecord<HInstruction*>(current_method)),
type_index_(type_index),
dex_file_(dex_file),
- klass_(klass),
- loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ klass_(klass) {
// Referrers class should not need access check. We never inline unverified
// methods so we can't possibly end up in this situation.
DCHECK(!is_referrers_class || !needs_access_check);
@@ -5940,6 +5961,7 @@ class HLoadClass FINAL : public HInstruction {
SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
SetPackedFlag<kFlagIsInBootImage>(false);
SetPackedFlag<kFlagGenerateClInitCheck>(false);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(false);
}
bool IsClonable() const OVERRIDE { return true; }
@@ -5988,13 +6010,18 @@ class HLoadClass FINAL : public HInstruction {
}
ReferenceTypeInfo GetLoadedClassRTI() {
- return loaded_class_rti_;
+ if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- void SetLoadedClassRTI(ReferenceTypeInfo rti) {
- // Make sure we only set exact types (the loaded class should never be merged).
- DCHECK(rti.IsExact());
- loaded_class_rti_ = rti;
+ // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(true);
}
dex::TypeIndex GetTypeIndex() const { return type_index_; }
@@ -6047,7 +6074,8 @@ class HLoadClass FINAL : public HInstruction {
static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1;
static constexpr size_t kFieldLoadKindSize =
MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
- static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
@@ -6075,8 +6103,6 @@ class HLoadClass FINAL : public HInstruction {
const DexFile& dex_file_;
Handle<mirror::Class> klass_;
-
- ReferenceTypeInfo loaded_class_rti_;
};
std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
@@ -6604,71 +6630,156 @@ enum class TypeCheckKind {
kInterfaceCheck, // No optimization yet when checking against an interface.
kArrayObjectCheck, // Can just check if the array is not primitive.
kArrayCheck, // No optimization yet when checking against a generic array.
+ kBitstringCheck, // Compare the type check bitstring.
kLast = kArrayCheck
};
std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
public:
- HInstanceOf(HInstruction* object,
- HLoadClass* target_class,
- TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HExpression(DataType::Type::kBool,
- SideEffectsForArchRuntimeCalls(check_kind),
- dex_pc) {
+ HTypeCheckInstruction(HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask,
+ SideEffects side_effects)
+ : HVariableInputSizeInstruction(
+ side_effects,
+ dex_pc,
+ allocator,
+ /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+ kArenaAllocTypeCheckInputs),
+ klass_(klass) {
SetPackedField<TypeCheckKindField>(check_kind);
SetPackedFlag<kFlagMustDoNullCheck>(true);
+ SetPackedFlag<kFlagValidTargetClassRTI>(false);
SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
+ SetRawInputAt(1, target_class_or_null);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ DCHECK(target_class_or_null->IsNullConstant());
+ SetRawInputAt(2, bitstring_path_to_root);
+ SetRawInputAt(3, bitstring_mask);
+ } else {
+ DCHECK(target_class_or_null->IsLoadClass());
+ }
}
HLoadClass* GetTargetClass() const {
+ DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
HInstruction* load_class = InputAt(1);
DCHECK(load_class->IsLoadClass());
return load_class->AsLoadClass();
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
+ uint32_t GetBitstringPathToRoot() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* path_to_root = InputAt(2);
+ DCHECK(path_to_root->IsIntConstant());
+ return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+ }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ uint32_t GetBitstringMask() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* mask = InputAt(3);
+ DCHECK(mask->IsIntConstant());
+ return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
}
- bool NeedsEnvironment() const OVERRIDE {
- return CanCallRuntime(GetTypeCheckKind());
+ bool IsClonable() const OVERRIDE { return true; }
+ bool CanBeMoved() const OVERRIDE { return true; }
+
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+ return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
}
- // Used only in code generation.
bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
- static bool CanCallRuntime(TypeCheckKind check_kind) {
- // Mips currently does runtime calls for any other checks.
- return check_kind != TypeCheckKind::kExactCheck;
+ ReferenceTypeInfo GetTargetClassRTI() {
+ if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
- return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidTargetClassRTI>(true);
}
- DECLARE_INSTRUCTION(InstanceOf);
+ Handle<mirror::Class> GetClass() const {
+ return klass_;
+ }
protected:
- DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
+ DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
static constexpr size_t kFieldTypeCheckKindSize =
MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+ Handle<mirror::Class> klass_;
+};
+
+class HInstanceOf FINAL : public HTypeCheckInstruction {
+ public:
+ HInstanceOf(HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+ DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; }
+
+ bool NeedsEnvironment() const OVERRIDE {
+ return CanCallRuntime(GetTypeCheckKind());
+ }
+
+ static bool CanCallRuntime(TypeCheckKind check_kind) {
+ // Mips currently does runtime calls for any other checks.
+ return check_kind != TypeCheckKind::kExactCheck;
+ }
+
+ static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
+ return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ }
+
+ DECLARE_INSTRUCTION(InstanceOf);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
};
class HBoundType FINAL : public HExpression<1> {
@@ -6718,31 +6829,25 @@ class HBoundType FINAL : public HExpression<1> {
ReferenceTypeInfo upper_bound_;
};
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast FINAL : public HTypeCheckInstruction {
public:
HCheckCast(HInstruction* object,
- HLoadClass* target_class,
+ HInstruction* target_class_or_null,
TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
- SetPackedField<TypeCheckKindField>(check_kind);
- SetPackedFlag<kFlagMustDoNullCheck>(true);
- SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
- }
-
- HLoadClass* GetTargetClass() const {
- HInstruction* load_class = InputAt(1);
- DCHECK(load_class->IsLoadClass());
- return load_class->AsLoadClass();
- }
-
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
-
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffects::CanTriggerGC()) {}
bool NeedsEnvironment() const OVERRIDE {
// Instruction may throw a CheckCastError.
@@ -6751,24 +6856,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> {
bool CanThrow() const OVERRIDE { return true; }
- bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
- void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
- TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
- bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
-
DECLARE_INSTRUCTION(CheckCast);
protected:
DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeCheckKindSize =
- MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
- static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
- static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
};
/**
@@ -7309,19 +7400,19 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr);
class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
public:
explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
- : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {}
+ : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
void VisitInstruction(HInstruction* instruction) OVERRIDE {
if (instruction->IsClonable()) {
ReplaceInstrOrPhiByClone(instruction);
- instr_replaced_by_clones_count++;
+ instr_replaced_by_clones_count_++;
}
}
- size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; }
+ size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; }
private:
- size_t instr_replaced_by_clones_count;
+ size_t instr_replaced_by_clones_count_;
DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a3b1f0c5af..c35c490118 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -382,6 +382,8 @@ class OptimizingCompiler FINAL : public Compiler {
PassObserver* pass_observer,
VariableSizedHandleScope* handles) const;
+ void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info);
+
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
std::unique_ptr<std::ostream> visualizer_output_;
@@ -1230,7 +1232,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
debug::MethodDebugInfo info = {};
- DCHECK(info.trampoline_name.empty());
+ DCHECK(info.custom_name.empty());
info.dex_file = dex_file;
info.class_def_index = class_def_idx;
info.dex_method_index = method_idx;
@@ -1246,14 +1248,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
info.code_info = nullptr;
info.cfi = jni_compiled_method.GetCfi();
- // If both flags are passed, generate full debug info.
- const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
- std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
- GetCompilerDriver()->GetInstructionSet(),
- GetCompilerDriver()->GetInstructionSetFeatures(),
- mini_debug_info,
- info);
- CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+ GenerateJitDebugInfo(info);
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1361,7 +1356,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
debug::MethodDebugInfo info = {};
- DCHECK(info.trampoline_name.empty());
+ DCHECK(info.custom_name.empty());
info.dex_file = dex_file;
info.class_def_index = class_def_idx;
info.dex_method_index = method_idx;
@@ -1377,14 +1372,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
- // If both flags are passed, generate full debug info.
- const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
- std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
- GetCompilerDriver()->GetInstructionSet(),
- GetCompilerDriver()->GetInstructionSetFeatures(),
- mini_debug_info,
- info);
- CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+ GenerateJitDebugInfo(info);
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
@@ -1408,4 +1396,22 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return true;
}
+void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) {
+ const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+ DCHECK(compiler_options.GenerateAnyDebugInfo());
+
+ // If both flags are passed, generate full debug info.
+ const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+
+ // Create entry for the single method that we just compiled.
+ std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
+ GetCompilerDriver()->GetInstructionSet(),
+ GetCompilerDriver()->GetInstructionSetFeatures(),
+ mini_debug_info,
+ ArrayRef<const debug::MethodDebugInfo>(&info, 1));
+ MutexLock mu(Thread::Current(), g_jit_debug_mutex);
+ JITCodeEntry* entry = CreateJITCodeEntry(elf_file);
+ IncrementJITCodeEntryRefcount(entry, info.code_address);
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 0023265e50..a6a2f46d2e 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -99,6 +99,7 @@ enum class MethodCompilationStat {
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
kConstructorFenceRemovedCFRE,
+ kBitstringTypeCheck,
kJitOutOfMemoryForCommit,
kLastStat
};
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c008d8..59733397bf 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() {
}
}
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f016c1..f6e4d3ef99 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
"prepare_for_register_allocation";
private:
+ void VisitCheckCast(HCheckCast* check_cast) OVERRIDE;
+ void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;
void VisitNullCheck(HNullCheck* check) OVERRIDE;
void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 8bb124e066..178d7fd0f0 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+ void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;
void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
void VisitLoadString(HLoadString* instr) OVERRIDE;
void VisitLoadException(HLoadException* instr) OVERRIDE;
@@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() {
<< "NullCheck " << instr->GetReferenceTypeInfo()
<< "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
}
+ } else if (instr->IsInstanceOf()) {
+ HInstanceOf* iof = instr->AsInstanceOf();
+ DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+ } else if (instr->IsCheckCast()) {
+ HCheckCast* check = instr->AsCheckCast();
+ DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
}
}
}
@@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*
return;
}
- HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
if (!class_rti.IsValid()) {
// He have loaded an unresolved class. Don't bother bounding the type.
return;
@@ -644,15 +650,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::Class> resolved_class = instr->GetClass();
- if (IsAdmissible(resolved_class.Get())) {
- instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
- resolved_class, /* is_exact */ true));
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidLoadedClassRTI();
}
instr->SetReferenceTypeInfo(
ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
}
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidTargetClassRTI();
+ }
+}
+
void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
}
@@ -720,8 +731,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
}
void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
// The next instruction is not an uninitialized BoundType. This must be
@@ -730,12 +739,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast
}
DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
- if (class_rti.IsValid()) {
+ ScopedObjectAccess soa(Thread::Current());
+ Handle<mirror::Class> klass = check_cast->GetClass();
+ if (IsAdmissible(klass.Get())) {
DCHECK(is_first_run_);
- ScopedObjectAccess soa(Thread::Current());
+ check_cast->SetValidTargetClassRTI();
// This is the first run of RTP and class is resolved.
- bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
- bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+ bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+ bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
/* CheckCast succeeds for nulls. */ true);
} else {
// This is the first run of RTP and class is unresolved. Remove the binding.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411c72..dffef17587 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -236,6 +236,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
return load_kind;
}
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(!klass->IsProxyClass());
+ DCHECK(!klass->IsArrayClass());
+
+ if (Runtime::Current()->UseJitCompilation()) {
+ // If we're JITting, try to assign a type check bitstring (fall through).
+ } else if (codegen->GetCompilerOptions().IsBootImage()) {
+ const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+ if (!compiler_driver->IsImageClass(descriptor)) {
+ return false;
+ }
+ // If the target is a boot image class, try to assign a type check bitstring (fall through).
+ // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+ } else {
+ // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+ // already assigned in the boot image.
+ return false;
+ }
+
+ // Try to assign a type check bitstring.
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile()
+ // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+ kIsDebugBuild &&
+ codegen->GetCompilerOptions().IsBootImage() &&
+ codegen->GetCompilerOptions().IsForceDeterminism()) {
+ SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+ << klass->PrettyDescriptor() << "/" << old_state
+ << " in " << codegen->GetGraph()->PrettyMethod();
+ }
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+ return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ bool needs_access_check) {
+ if (klass == nullptr) {
+ return TypeCheckKind::kUnresolvedCheck;
+ } else if (klass->IsInterface()) {
+ return TypeCheckKind::kInterfaceCheck;
+ } else if (klass->IsArrayClass()) {
+ if (klass->GetComponentType()->IsObjectClass()) {
+ return TypeCheckKind::kArrayObjectCheck;
+ } else if (klass->CannotBeAssignedFromOtherTypes()) {
+ return TypeCheckKind::kExactCheck;
+ } else {
+ return TypeCheckKind::kArrayCheck;
+ }
+ } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes.
+ return TypeCheckKind::kExactCheck;
+ } else if (kUseBitstringTypeCheck &&
+ !needs_access_check &&
+ CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) {
+ // TODO: We should not need the `!needs_access_check` check but getting rid of that
+ // requires rewriting some optimizations in instruction simplifier.
+ return TypeCheckKind::kBitstringCheck;
+ } else if (klass->IsAbstract()) {
+ return TypeCheckKind::kAbstractClassCheck;
+ } else {
+ return TypeCheckKind::kClassHierarchyCheck;
+ }
+}
+
void HSharpening::ProcessLoadString(
HLoadString* load_string,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d91e..fa3e948eeb 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -44,12 +44,10 @@ class HSharpening : public HOptimization {
static constexpr const char* kSharpeningPassName = "sharpening";
- // Used by the builder.
- static void ProcessLoadString(HLoadString* load_string,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit,
- VariableSizedHandleScope* handles);
+ // Used by Sharpening and InstructionSimplifier.
+ static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver);
// Used by the builder and the inliner.
static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
@@ -58,10 +56,19 @@ class HSharpening : public HOptimization {
const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
- // Used by Sharpening and InstructionSimplifier.
- static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver);
+ // Used by the builder.
+ static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ bool needs_access_check)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Used by the builder.
+ static void ProcessLoadString(HLoadString* load_string,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ VariableSizedHandleScope* handles);
private:
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
new file mode 100644
index 0000000000..a7c23bef7e
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "superblock_cloner.h"
+
+#include "common_dominator.h"
+#include "graph_checker.h"
+
+#include <iostream>
+
+namespace art {
+
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
+
+void HEdge::Dump(std::ostream& stream) const {
+ stream << "(" << from_ << "->" << to_ << ")";
+}
+
+//
+// Static helper methods.
+//
+
+// Returns whether instruction has any uses (regular or environmental) outside the region,
+// defined by basic block set.
+static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) {
+ auto& uses = instr->GetUses();
+ for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) {
+ HInstruction* user = use_node->GetUser();
+ if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+ return true;
+ }
+ }
+
+ auto& env_uses = instr->GetEnvUses();
+ for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) {
+ HInstruction* user = use_node->GetUser()->GetHolder();
+ if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns whether the phi's inputs are the same HInstruction.
+static bool ArePhiInputsTheSame(const HPhi* phi) {
+ HInstruction* first_input = phi->InputAt(0);
+ for (size_t i = 1, e = phi->InputCount(); i < e; i++) {
+ if (phi->InputAt(i) != first_input) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+ if (loop1 != nullptr || loop2 != nullptr) {
+ return nullptr;
+ }
+
+ if (loop1->IsIn(*loop2)) {
+ return loop2;
+ } else if (loop2->IsIn(*loop1)) {
+ return loop1;
+ }
+ HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
+ return block->GetLoopInformation();
+}
+
+// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
+static void OrderLoopsHeadersPredecessors(HGraph* graph) {
+ for (HBasicBlock* block : graph->GetPostOrder()) {
+ if (block->IsLoopHeader()) {
+ graph->OrderLoopHeaderPredecessors(block);
+ }
+ }
+}
+
+//
+// Helpers for CloneBasicBlock.
+//
+
+void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) {
+ DCHECK(!copy_instr->IsPhi());
+ for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) {
+ // Copy instruction holds the same input as the original instruction holds.
+ HInstruction* orig_input = copy_instr->InputAt(i);
+ if (!IsInOrigBBSet(orig_input->GetBlock())) {
+ // Defined outside the subgraph.
+ continue;
+ }
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ // copy_instr will be registered as a user of copy_inputs after returning from this function:
+ // 'copy_block->AddInstruction(copy_instr)'.
+ copy_instr->SetRawInputAt(i, copy_input);
+ }
+}
+
+void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr,
+ const HEnvironment* orig_env) {
+ if (orig_env->GetParent() != nullptr) {
+ DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent());
+ }
+ HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr);
+
+ for (size_t i = 0; i < orig_env->Size(); i++) {
+ HInstruction* env_input = orig_env->GetInstructionAt(i);
+ if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) {
+ env_input = GetInstrCopy(env_input);
+ DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr);
+ }
+ copy_env->SetRawEnvAt(i, env_input);
+ if (env_input != nullptr) {
+ env_input->AddEnvUseAt(copy_env, i);
+ }
+ }
+ // InsertRawEnvironment assumes that instruction already has an environment that's why we use
+ // SetRawEnvironment in the 'else' case.
+ // As this function calls itself recursively with the same copy_instr - this copy_instr may
+ // have partially copied chain of HEnvironments.
+ if (copy_instr->HasEnvironment()) {
+ copy_instr->InsertRawEnvironment(copy_env);
+ } else {
+ copy_instr->SetRawEnvironment(copy_env);
+ }
+}
+
+//
+// Helpers for RemapEdgesSuccessors.
+//
+
+void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+
+ size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ size_t phi_input_count = 0;
+ // This flag reflects whether the original successor has at least one phi and this phi
+ // has been already processed in the loop. Used for validation purposes in DCHECK to check that
+ // in the end all of the phis in the copy successor have the same number of inputs - the number
+ // of copy successor's predecessors.
+ bool first_phi_met = false;
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(this_index);
+ // Remove corresponding input for original phi.
+ orig_phi->RemoveInputAt(this_index);
+ // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds
+ // to orig_block, so add the input at the end of the list.
+ copy_phi->AddInput(orig_phi_input);
+ if (!first_phi_met) {
+ phi_input_count = copy_phi->InputCount();
+ first_phi_met = true;
+ } else {
+ DCHECK_EQ(phi_input_count, copy_phi->InputCount());
+ }
+ }
+ // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds
+ // to the previously added phi inputs position.
+ orig_block->ReplaceSuccessor(orig_succ, copy_succ);
+ DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count);
+}
+
+void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+ copy_block->AddSuccessor(copy_succ);
+
+ size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+ copy_phi->AddInput(orig_phi_input);
+ }
+}
+
+void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
+ HBasicBlock* orig_succ) {
+ DCHECK(IsInOrigBBSet(orig_succ));
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ copy_block->AddSuccessor(orig_succ);
+ DCHECK(copy_block->HasSuccessor(orig_succ));
+
+ size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+ for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+ orig_phi->AddInput(orig_phi_input);
+ }
+}
+
+//
+// Local versions of CF calculation/adjustment routines.
+//
+
+// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect
+// the performance of the base version by checking the local set.
+// TODO: this version works when updating the back edges info for natural loop-based local_set.
+// Check which exactly types of subgraphs can be analysed or rename it to
+// FindBackEdgesInTheNaturalLoop.
+void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) {
+ ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
+ DCHECK_EQ(visited.GetHighestBitSet(), -1);
+
+ // Nodes that we're currently visiting, indexed by block id.
+ ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder);
+ // Number of successors visited from a given node, indexed by block id.
+ ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(),
+ 0u,
+ arena_->Adapter(kArenaAllocGraphBuilder));
+ // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
+ ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
+ constexpr size_t kDefaultWorklistSize = 8;
+ worklist.reserve(kDefaultWorklistSize);
+
+ visited.SetBit(entry_block->GetBlockId());
+ visiting.SetBit(entry_block->GetBlockId());
+ worklist.push_back(entry_block);
+
+ while (!worklist.empty()) {
+ HBasicBlock* current = worklist.back();
+ uint32_t current_id = current->GetBlockId();
+ if (successors_visited[current_id] == current->GetSuccessors().size()) {
+ visiting.ClearBit(current_id);
+ worklist.pop_back();
+ } else {
+ HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+ uint32_t successor_id = successor->GetBlockId();
+ if (!local_set->IsBitSet(successor_id)) {
+ continue;
+ }
+
+ if (visiting.IsBitSet(successor_id)) {
+ DCHECK(ContainsElement(worklist, successor));
+ successor->AddBackEdgeWhileUpdating(current);
+ } else if (!visited.IsBitSet(successor_id)) {
+ visited.SetBit(successor_id);
+ visiting.SetBit(successor_id);
+ worklist.push_back(successor);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
+ // TODO: DCHECK that after the transformation the graph is connected.
+ HBasicBlock* block_entry = nullptr;
+
+ if (outer_loop_ == nullptr) {
+ for (auto block : graph_->GetBlocks()) {
+ if (block != nullptr) {
+ outer_loop_bb_set->SetBit(block->GetBlockId());
+ HLoopInformation* info = block->GetLoopInformation();
+ if (info != nullptr) {
+ info->ResetBasicBlockData();
+ }
+ }
+ }
+ block_entry = graph_->GetEntryBlock();
+ } else {
+ outer_loop_bb_set->Copy(&outer_loop_bb_set_);
+ block_entry = outer_loop_->GetHeader();
+
+ // Add newly created copy blocks.
+ for (auto entry : *bb_map_) {
+ outer_loop_bb_set->SetBit(entry.second->GetBlockId());
+ }
+
+ // Clear loop_info for the whole outer loop.
+ for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* info = block->GetLoopInformation();
+ if (info != nullptr) {
+ info->ResetBasicBlockData();
+ }
+ }
+ }
+
+ FindBackEdgesLocal(block_entry, outer_loop_bb_set);
+
+ for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* info = block->GetLoopInformation();
+ // Reset LoopInformation for regular blocks and old headers which are no longer loop headers.
+ if (info != nullptr &&
+ (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) {
+ block->SetLoopInformation(nullptr);
+ }
+ }
+}
+
+// This is a modified version of HGraph::AnalyzeLoops.
+GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) {
+ // We iterate post order to ensure we visit inner loops before outer loops.
+ // `PopulateRecursive` needs this guarantee to know whether a natural loop
+ // contains an irreducible loop.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+ continue;
+ }
+ if (block->IsLoopHeader()) {
+ if (block->IsCatchBlock()) {
+ // TODO: Dealing with exceptional back edges could be tricky because
+ // they only approximate the real control flow. Bail out for now.
+ return kAnalysisFailThrowCatchLoop;
+ }
+ block->GetLoopInformation()->Populate();
+ }
+ }
+
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+ continue;
+ }
+ if (block->IsLoopHeader()) {
+ HLoopInformation* cur_loop = block->GetLoopInformation();
+ HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation();
+ if (outer_loop != nullptr) {
+ outer_loop->PopulateInnerLoopUpwards(cur_loop);
+ }
+ }
+ }
+
+ return kAnalysisSuccess;
+}
+
+void SuperblockCloner::CleanUpControlFlow() {
+ // TODO: full control flow clean up for now, optimize it.
+ graph_->ClearDominanceInformation();
+
+ ArenaBitVector outer_loop_bb_set(
+ arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+ // TODO: do it locally.
+ graph_->SimplifyCFG();
+ graph_->ComputeDominanceInformation();
+
+ // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just
+ // before in ComputeDominanceInformation.
+ GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set);
+ DCHECK_EQ(result, kAnalysisSuccess);
+
+ // TODO: do it locally
+ OrderLoopsHeadersPredecessors(graph_);
+
+ graph_->ComputeTryBlockInformation();
+}
+
+//
+// Helpers for ResolveDataFlow
+//
+
+void SuperblockCloner::ResolvePhi(HPhi* phi) {
+ HBasicBlock* phi_block = phi->GetBlock();
+ for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+ HInstruction* input = phi->InputAt(i);
+ HBasicBlock* input_block = input->GetBlock();
+
+ // Originally defined outside the region.
+ if (!IsInOrigBBSet(input_block)) {
+ continue;
+ }
+ HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i];
+ if (!IsInOrigBBSet(corresponding_block)) {
+ phi->ReplaceInput(GetInstrCopy(input), i);
+ }
+ }
+}
+
+//
+// Main algorithm methods.
+//
+
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+ DCHECK(exits->empty());
+ for (uint32_t block_id : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(block_id);
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ if (!IsInOrigBBSet(succ)) {
+ exits->push_back(succ);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
+ DCHECK(outer_loop_ == nullptr);
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+
+ // For a reducible graph we need to update back-edges and dominance information only for
+ // the outermost loop which is affected by the transformation - it can be found by picking
+ // the common most outer loop of loops to which the subgraph exits blocks belong.
+ // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case).
+ for (HBasicBlock* exit : exits) {
+ HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation();
+ if (loop_exit_loop_info == nullptr) {
+ outer_loop_ = nullptr;
+ break;
+ }
+ outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
+ }
+
+ if (outer_loop_ != nullptr) {
+ // Save the loop population info as it will be changed later.
+ outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks());
+ }
+}
+
+void SuperblockCloner::RemapEdgesSuccessors() {
+ // Redirect incoming edges.
+ for (HEdge e : *remap_incoming_) {
+ HBasicBlock* orig_block = GetBlockById(e.GetFrom());
+ HBasicBlock* orig_succ = GetBlockById(e.GetTo());
+ RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+ }
+
+ // Redirect internal edges.
+ for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+ HBasicBlock* orig_block = GetBlockById(orig_block_id);
+
+ for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) {
+ uint32_t orig_succ_id = orig_succ->GetBlockId();
+
+ // Check for outgoing edge.
+ if (!IsInOrigBBSet(orig_succ)) {
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ copy_block->AddSuccessor(orig_succ);
+ continue;
+ }
+
+ auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+ auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+
+ // Due to construction all successors of copied block were set to original.
+ if (copy_redir != remap_copy_internal_->end()) {
+ RemapCopyInternalEdge(orig_block, orig_succ);
+ } else {
+ AddCopyInternalEdge(orig_block, orig_succ);
+ }
+
+ if (orig_redir != remap_orig_internal_->end()) {
+ RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+ }
+ }
+ }
+}
+
+void SuperblockCloner::AdjustControlFlowInfo() {
+ ArenaBitVector outer_loop_bb_set(
+ arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+ graph_->ClearDominanceInformation();
+ // TODO: Do it locally.
+ graph_->ComputeDominanceInformation();
+}
+
+// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to
+// the valid values; only phis' inputs must be adjusted.
+void SuperblockCloner::ResolveDataFlow() {
+ for (auto entry : *bb_map_) {
+ HBasicBlock* orig_block = entry.first;
+
+ for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* orig_phi = it.Current()->AsPhi();
+ HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+ ResolvePhi(orig_phi);
+ ResolvePhi(copy_phi);
+ }
+ if (kIsDebugBuild) {
+ // Inputs of instruction copies must be already mapped to correspondent inputs copies.
+ for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+ CheckInstructionInputsRemapping(it.Current());
+ }
+ }
+ }
+}
+
+//
+// Debug and logging methods.
+//
+
+void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
+ DCHECK(!orig_instr->IsPhi());
+ HInstruction* copy_instr = GetInstrCopy(orig_instr);
+ for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+ HInstruction* orig_input = orig_instr->InputAt(i);
+ DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock()));
+
+ // If original input is defined outside the region then it will remain for both original
+ // instruction and the copy after the transformation.
+ if (!IsInOrigBBSet(orig_input->GetBlock())) {
+ continue;
+ }
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+ }
+
+ // Resolve environment.
+ if (orig_instr->HasEnvironment()) {
+ HEnvironment* orig_env = orig_instr->GetEnvironment();
+
+ for (size_t i = 0, e = orig_env->Size(); i < e; ++i) {
+ HInstruction* orig_input = orig_env->GetInstructionAt(i);
+
+ // If original input is defined outside the region then it will remain for both original
+ // instruction and the copy after the transformation.
+ if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) {
+ continue;
+ }
+
+ HInstruction* copy_input = GetInstrCopy(orig_input);
+ DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+ }
+ }
+}
+
+//
+// Public methods.
+//
+
+SuperblockCloner::SuperblockCloner(HGraph* graph,
+ const HBasicBlockSet* orig_bb_set,
+ HBasicBlockMap* bb_map,
+ HInstructionMap* hir_map)
+ : graph_(graph),
+ arena_(graph->GetAllocator()),
+ orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+ remap_orig_internal_(nullptr),
+ remap_copy_internal_(nullptr),
+ remap_incoming_(nullptr),
+ bb_map_(bb_map),
+ hir_map_(hir_map),
+ outer_loop_(nullptr),
+ outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+ orig_bb_set_.Copy(orig_bb_set);
+}
+
+void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+ const HEdgeSet* remap_copy_internal,
+ const HEdgeSet* remap_incoming) {
+ remap_orig_internal_ = remap_orig_internal;
+ remap_copy_internal_ = remap_copy_internal;
+ remap_incoming_ = remap_incoming;
+}
+
+bool SuperblockCloner::IsSubgraphClonable() const {
+ // TODO: Support irreducible graphs and graphs with try-catch.
+ if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) {
+ return false;
+ }
+
+ // Check that there are no instructions defined in the subgraph and used outside.
+ // TODO: Improve this by accepting graph with such uses but only one exit.
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable() ||
+ IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ return false;
+ }
+ }
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable() ||
+ IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void SuperblockCloner::Run() {
+ DCHECK(bb_map_ != nullptr);
+ DCHECK(hir_map_ != nullptr);
+ DCHECK(remap_orig_internal_ != nullptr &&
+ remap_copy_internal_ != nullptr &&
+ remap_incoming_ != nullptr);
+ DCHECK(IsSubgraphClonable());
+
+ // Find an area in the graph for which control flow information should be adjusted.
+ FindAndSetLocalAreaForAdjustments();
+ // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
+ // adjusted.
+ CloneBasicBlocks();
+ // Connect the blocks together/remap successors and fix phis which are directly affected my the
+ // remapping.
+ RemapEdgesSuccessors();
+ // Recalculate dominance and backedge information which is required by the next stage.
+ AdjustControlFlowInfo();
+ // Fix data flow of the graph.
+ ResolveDataFlow();
+}
+
+void SuperblockCloner::CleanUp() {
+ CleanUpControlFlow();
+
+ // Remove phis which have all inputs being same.
+ // When a block has a single predecessor it must not have any phis. However after the
+ // transformation it could happen that there is such block with a phi with a single input.
+ // As this is needed to be processed we also simplify phis with multiple same inputs here.
+ for (auto entry : *bb_map_) {
+ HBasicBlock* orig_block = entry.first;
+ for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HPhi* phi = inst_it.Current()->AsPhi();
+ if (ArePhiInputsTheSame(phi)) {
+ phi->ReplaceWith(phi->InputAt(0));
+ orig_block->RemovePhi(phi);
+ }
+ }
+
+ HBasicBlock* copy_block = GetBlockCopy(orig_block);
+ for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+ HPhi* phi = inst_it.Current()->AsPhi();
+ if (ArePhiInputsTheSame(phi)) {
+ phi->ReplaceWith(phi->InputAt(0));
+ copy_block->RemovePhi(phi);
+ }
+ }
+ }
+}
+
+HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
+ HGraph* graph = orig_block->GetGraph();
+ HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc());
+ graph->AddBlock(copy_block);
+
+ // Clone all the phis and add them to the map.
+ for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* orig_instr = it.Current();
+ HInstruction* copy_instr = orig_instr->Clone(arena_);
+ copy_block->AddPhi(copy_instr->AsPhi());
+ copy_instr->AsPhi()->RemoveAllInputs();
+ DCHECK(!orig_instr->HasEnvironment());
+ hir_map_->Put(orig_instr, copy_instr);
+ }
+
+ // Clone all the instructions and add them to the map.
+ for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* orig_instr = it.Current();
+ HInstruction* copy_instr = orig_instr->Clone(arena_);
+ ReplaceInputsWithCopies(copy_instr);
+ copy_block->AddInstruction(copy_instr);
+ if (orig_instr->HasEnvironment()) {
+ DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment());
+ }
+ hir_map_->Put(orig_instr, copy_instr);
+ }
+
+ return copy_block;
+}
+
+void SuperblockCloner::CloneBasicBlocks() {
+ // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied
+ // instructions might be replaced by copies of the original inputs (depending where those inputs
+ // are defined). So the definitions of the original inputs must be visited before their original
+ // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')"
+ // guarantees that.
+ for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) {
+ if (!IsInOrigBBSet(orig_block)) {
+ continue;
+ }
+ HBasicBlock* copy_block = CloneBasicBlock(orig_block);
+ bb_map_->Put(orig_block, copy_block);
+ if (kSuperblockClonerLogging) {
+ std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
+ std::endl;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
new file mode 100644
index 0000000000..23de692673
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+
+#include "base/arena_bit_vector.h"
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
+#include "nodes.h"
+
+namespace art {
+
+static const bool kSuperblockClonerLogging = false;
+static const bool kSuperblockClonerVerify = false;
+
+// Represents an edge between two HBasicBlocks.
+//
+// Note: objects of this class are small - pass them by value.
+class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
+ public:
+ HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
+ DCHECK_NE(to_, kInvalidBlockId);
+ DCHECK_NE(from_, kInvalidBlockId);
+ }
+ HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
+ DCHECK_NE(to_, kInvalidBlockId);
+ DCHECK_NE(from_, kInvalidBlockId);
+ }
+ HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
+
+ uint32_t GetFrom() const { return from_; }
+ uint32_t GetTo() const { return to_; }
+
+ bool operator==(const HEdge& other) const {
+ return this->from_ == other.from_ && this->to_ == other.to_;
+ }
+
+ bool operator!=(const HEdge& other) const { return !operator==(other); }
+ void Dump(std::ostream& stream) const;
+
+ // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
+ // has to_ block as a successor.
+ bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
+
+ private:
+ // Predecessor block id.
+ uint32_t from_;
+ // Successor block id.
+ uint32_t to_;
+};
+
+// Returns whether a HEdge edge corresponds to an existing edge in the graph.
+inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
+ if (!edge.IsValid()) {
+ return false;
+ }
+ uint32_t from = edge.GetFrom();
+ uint32_t to = edge.GetTo();
+ if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
+ return false;
+ }
+
+ HBasicBlock* block_from = graph->GetBlocks()[from];
+ HBasicBlock* block_to = graph->GetBlocks()[to];
+ if (block_from == nullptr || block_to == nullptr) {
+ return false;
+ }
+
+ return block_from->HasSuccessor(block_to, 0);
+}
+
+// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
+// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
+// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
+// and a set of rules how to treat edges, remap their successors. By using this approach such
+// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
+//
+// The idea of the transformation is based on "Superblock cloning" technique described in the book
+// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
+// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
+// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
+// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
+//
+// There are two states of the IR graph: original graph (before the transformation) and
+// copy graph (after).
+//
+// Before the transformation:
+// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
+// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
+// where pred, succ - basic blocks):
+// - internal - pred, succ are members of ‘orig_bb_set’.
+// - outside - pred, succ are not members of ‘orig_bb_set’.
+// - incoming - pred is not a member of ‘orig_bb_set’, succ is.
+// - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
+//
+// Transformation:
+//
+// 1. Initial cloning:
+// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
+// form ‘copy_bb_set’.
+// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
+// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
+// set.
+// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
+// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
+// set.
+// 2. Successors remapping.
+// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
+// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
+// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
+// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
+// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
+// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
+// (X, Y_1)).
+// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
+// 4. Fix/resolve data flow.
+// 5. Do cleanups (DCE, critical edges splitting, etc).
+//
+class SuperblockCloner : public ValueObject {
+ public:
+ // TODO: Investigate optimal types for the containers.
+ using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
+ using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
+ using HBasicBlockSet = ArenaBitVector;
+ using HEdgeSet = ArenaHashSet<HEdge>;
+
+ SuperblockCloner(HGraph* graph,
+ const HBasicBlockSet* orig_bb_set,
+ HBasicBlockMap* bb_map,
+ HInstructionMap* hir_map);
+
+ // Sets edge successor remapping info specified by corresponding edge sets.
+ void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+ const HEdgeSet* remap_copy_internal,
+ const HEdgeSet* remap_incoming);
+
+ // Returns whether the specified subgraph is copyable.
+ // TODO: Start from small range of graph patterns then extend it.
+ bool IsSubgraphClonable() const;
+
+ // Runs the copy algorithm according to the description.
+ void Run();
+
+ // Cleans up the graph after transformation: splits critical edges, recalculates control flow
+ // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
+ void CleanUp();
+
+ // Returns a clone of a basic block (orig_block).
+ //
+ // - The copy block will have no successors/predecessors; they should be set up manually.
+ // - For each instruction in the orig_block a copy is created and inserted into the copy block;
+ // this correspondence is recorded in the map (old instruction, new instruction).
+ // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
+ // same, as in the original block, PHIs do not reflect a correct correspondence between the
+ // value and predecessors (as the copy block has no predecessors by now), etc.
+ HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
+
+ // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
+ // and hir_map_.
+ void CloneBasicBlocks();
+
+ HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
+ auto copy_input_iter = hir_map_->find(orig_instr);
+ DCHECK(copy_input_iter != hir_map_->end());
+ return copy_input_iter->second;
+ }
+
+ HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
+ HBasicBlock* block = bb_map_->Get(orig_block);
+ DCHECK(block != nullptr);
+ return block;
+ }
+
+ HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
+ for (auto it : *hir_map_) {
+ if (it.second == copy_instr) {
+ return it.first;
+ }
+ }
+ return nullptr;
+ }
+
+ bool IsInOrigBBSet(uint32_t block_id) const {
+ return orig_bb_set_.IsBitSet(block_id);
+ }
+
+ bool IsInOrigBBSet(const HBasicBlock* block) const {
+ return IsInOrigBBSet(block->GetBlockId());
+ }
+
+ private:
+ // Fills the 'exits' vector with the subgraph exits.
+ void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+
+ // Finds and records information about the area in the graph for which control-flow (back edges,
+ // loops, dominators) needs to be adjusted.
+ void FindAndSetLocalAreaForAdjustments();
+
+ // Remaps edges' successors according to the info specified in the edges sets.
+ //
+ // Only edge successors/predecessors and phis' input records (to have a correspondence between
+ // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
+ // phis' nor instructions' inputs values are resolved.
+ void RemapEdgesSuccessors();
+
+ // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+ // FindAndSetLocalAreaForAdjustments.
+ void AdjustControlFlowInfo();
+
+ // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
+ // the SSA form.
+ void ResolveDataFlow();
+
+ //
+ // Helpers for CloneBasicBlock.
+ //
+
+ // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
+ // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
+ void ReplaceInputsWithCopies(HInstruction* copy_instr);
+
+ // Recursively clones the environment for the copy instruction. If the input of the original
+ // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
+ // leaves it the same as original.
+ void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
+
+ //
+ // Helpers for RemapEdgesSuccessors.
+ //
+
+ // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
+ // copy_succ.
+ void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
+ void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
+ void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+ //
+ // Local versions of control flow calculation/adjustment routines.
+ //
+
+ void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
+ void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
+ GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
+ void CleanUpControlFlow();
+
+ //
+ // Helpers for ResolveDataFlow
+ //
+
+ // Resolves the inputs of the phi.
+ void ResolvePhi(HPhi* phi);
+
+ //
+ // Debug and logging methods.
+ //
+ void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+
+ HBasicBlock* GetBlockById(uint32_t block_id) const {
+ DCHECK(block_id < graph_->GetBlocks().size());
+ HBasicBlock* block = graph_->GetBlocks()[block_id];
+ DCHECK(block != nullptr);
+ return block;
+ }
+
+ HGraph* const graph_;
+ ArenaAllocator* const arena_;
+
+ // Set of basic block in the original graph to be copied.
+ HBasicBlockSet orig_bb_set_;
+
+ // Sets of edges which require successors remapping.
+ const HEdgeSet* remap_orig_internal_;
+ const HEdgeSet* remap_copy_internal_;
+ const HEdgeSet* remap_incoming_;
+
+ // Correspondence map for blocks: (original block, copy block).
+ HBasicBlockMap* bb_map_;
+ // Correspondence map for instructions: (original HInstruction, copy HInstruction).
+ HInstructionMap* hir_map_;
+ // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+ HLoopInformation* outer_loop_;
+ HBasicBlockSet outer_loop_bb_set_;
+
+ ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+
+ DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
+};
+
+} // namespace art
+
+namespace std {
+
+template <>
+struct hash<art::HEdge> {
+ size_t operator()(art::HEdge const& x) const noexcept {
+ // Use Cantor pairing function as the hash function.
+ uint32_t a = x.GetFrom();
+ uint32_t b = x.GetTo();
+ return (a + b) * (a + b + 1) / 2 + b;
+ }
+};
+
+} // namespace std
+
+#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index fd77eb81fc..f1b7bffdf5 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -17,11 +17,15 @@
#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
+#include "superblock_cloner.h"
#include "gtest/gtest.h"
namespace art {
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
class SuperblockClonerTest : public OptimizingUnitTest {
@@ -182,4 +186,121 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
EXPECT_NE(new_suspend_check, nullptr);
}
+// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of
+// instructions' inputs.
+TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ CreateBasicLoopControlFlow(&header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ ASSERT_TRUE(CheckGraph());
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ &bb_map,
+ &hir_map);
+ EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+ cloner.CloneBasicBlocks();
+
+ EXPECT_EQ(bb_map.size(), 2u);
+ EXPECT_EQ(hir_map.size(), 12u);
+
+ for (auto it : hir_map) {
+ HInstruction* orig_instr = it.first;
+ HInstruction* copy_instr = it.second;
+
+ EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock());
+ EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind());
+ EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType());
+
+ if (orig_instr->IsPhi()) {
+ continue;
+ }
+
+ EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount());
+
+ // Check that inputs match.
+ for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+ HInstruction* orig_input = orig_instr->InputAt(i);
+ HInstruction* copy_input = copy_instr->InputAt(i);
+ if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+ EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+ } else {
+ EXPECT_EQ(orig_input, copy_input);
+ }
+ }
+
+ EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment());
+
+ // Check that environments match.
+ if (orig_instr->HasEnvironment()) {
+ HEnvironment* orig_env = orig_instr->GetEnvironment();
+ HEnvironment* copy_env = copy_instr->GetEnvironment();
+
+ EXPECT_EQ(copy_env->GetParent(), nullptr);
+ EXPECT_EQ(orig_env->Size(), copy_env->Size());
+
+ for (size_t i = 0, e = orig_env->Size(); i < e; i++) {
+ HInstruction* orig_input = orig_env->GetInstructionAt(i);
+ HInstruction* copy_input = copy_env->GetInstructionAt(i);
+ if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+ EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+ } else {
+ EXPECT_EQ(orig_input, copy_input);
+ }
+ }
+ }
+ }
+}
+
+// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control
+// flow.
+TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ CreateBasicLoopControlFlow(&header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ ASSERT_TRUE(CheckGraph());
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ nullptr,
+ nullptr);
+ EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+ cloner.FindAndSetLocalAreaForAdjustments();
+ cloner.CleanUpControlFlow();
+
+ EXPECT_TRUE(CheckGraph());
+
+ EXPECT_TRUE(entry_block_->Dominates(header));
+ EXPECT_TRUE(entry_block_->Dominates(exit_block_));
+
+ EXPECT_EQ(header->GetLoopInformation(), loop_info);
+ EXPECT_EQ(loop_info->GetHeader(), header);
+ EXPECT_TRUE(loop_info->Contains(*loop_body));
+ EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
+}
+
} // namespace art