diff options
Diffstat (limited to 'compiler/optimizing')
39 files changed, 2268 insertions, 254 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 147df1e3e8..d893cc88c4 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -836,9 +836,23 @@ class BCEVisitor : public HGraphVisitor { ValueRange array_range(&allocator_, lower, upper); // Try index range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceInstruction(bounds_check, index); - return; + if (index_range != nullptr) { + if (index_range->FitsIn(&array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } else if (index_range->IsConstantValueRange()) { + // If the non-constant index turns out to have a constant range, + // make one more attempt to get a constant in the array range. + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr && + existing_range->IsConstantValueRange()) { + ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + if (index_range->FitsIn(&constant_array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } + } + } } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 3c5a37f958..2dafbf7f6d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -438,6 +438,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; + case TypeCheckKind::kBitstringCheck: + return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 13bbffa1e3..b47a5cf3c4 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2112,6 +2112,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, vixl::aarch64::Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + } + // Compare the bitstring bits to `path_to_root`. + __ Cmp(temp, path_to_root); +} + void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -3840,6 +3860,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -3848,7 +3870,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3861,7 +3889,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4047,6 +4077,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Cset(out, eq); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } } if (zero.IsLinked()) { @@ -4069,7 +4116,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4079,7 +4132,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4260,6 +4315,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f92c94fda7..cc369de983 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -264,6 +264,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 577fe00dcd..504c6479cc 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2490,8 +2490,12 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { } if (!skip_overflow_check) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here. + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + vixl32::Register temp = r4; __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), @@ -7191,6 +7195,67 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, + vixl32::Register temp, + vixl32::FlagsUpdate flags_update) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs + // the Z flag for BNE. This is indicated by the `flags_update` parameter. + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root); + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else if (IsUint<16>(path_to_root)) { + if (temp.IsLow()) { + // Note: Optimized for size but contains one more dependent instruction than necessary. + // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the + // macro assembler would use the high reg IP for the constant by default. + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 + __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else { + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } + } else { + // Shift out bits that do not contribute to the comparison. + __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root << (32u - mask_bits)); + } else { + __ Sub(temp, temp, path_to_root << (32u - mask_bits)); + } + } + } +} + HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -7382,6 +7447,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7390,7 +7457,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7405,7 +7478,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7645,6 +7720,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out, DontCare); + // If `out` is a low reg and we would have another low reg temp, we could + // optimize this as RSBS+ADC, see GenerateConditionWithZero(). + // + // Also, in some cases when `out` is a low reg and we're loading a constant to IP + // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size + // would be the same and we would have fewer direct data dependencies. + codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR + break; + } } if (done.IsReferenced()) { @@ -7662,7 +7757,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7671,7 +7772,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7856,6 +7959,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop, /* far_target */ false); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsReferenced()) { __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 38570bb0fe..bd815f45b3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -322,6 +322,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch32::Register temp, + vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5c8e46ed19..2ed0ab79a1 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1929,6 +1929,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -3289,12 +3317,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3303,7 +3339,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3353,7 +3389,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls, slow_path->GetEntryLabel()); + __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); break; } @@ -3379,7 +3415,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls, &loop); + __ Bne(temp, cls.AsRegister<Register>(), &loop); break; } @@ -3394,7 +3430,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3417,7 +3453,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3476,7 +3512,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls, &loop); + __ Bne(AT, cls.AsRegister<Register>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); break; } } @@ -7207,6 +7257,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7215,7 +7267,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7227,7 +7285,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7257,7 +7315,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<Register>()); __ Sltiu(out, out, 1); break; } @@ -7282,7 +7340,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls, &loop); + __ Bne(out, cls.AsRegister<Register>(), &loop); __ LoadConst32(out, 1); break; } @@ -7298,7 +7356,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7323,7 +7381,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // Do an exact check. MipsLabel success; - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7355,7 +7413,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls, slow_path->GetEntryLabel()); + __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7387,6 +7445,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 32b3e4221f..ffeb3b00a7 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index bcfe051c90..3ae8a30754 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1775,6 +1775,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + GpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2844,12 +2872,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2858,7 +2894,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2908,7 +2944,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls, slow_path->GetEntryLabel()); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); break; } @@ -2934,7 +2970,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls, &loop); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); break; } @@ -2949,7 +2985,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -2972,7 +3008,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3031,7 +3067,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls, &loop); + __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnezc(temp, slow_path->GetEntryLabel()); break; } } @@ -5524,6 +5574,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5532,7 +5584,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5544,7 +5602,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5574,7 +5632,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<GpuRegister>()); __ Sltiu(out, out, 1); break; } @@ -5599,7 +5657,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls, &loop); + __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); __ LoadConst32(out, 1); break; } @@ -5615,7 +5673,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -5640,7 +5698,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); // Do an exact check. Mips64Label success; - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -5672,7 +5730,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls, slow_path->GetEntryLabel()); + __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5704,6 +5762,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { __ Bc(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d479410f07..87d5a9c15a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index ad8128a5b1..7b4b85d2fe 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -92,8 +92,8 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ pshufd(dst, dst, Immediate(0)); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); __ punpckldq(dst, tmp); @@ -101,13 +101,13 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i break; } case DataType::Type::kFloat32: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufps(dst, dst, Immediate(0)); break; case DataType::Type::kFloat64: - DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(locations->InAt(0).Equals(locations->Out())); __ shufpd(dst, dst, Immediate(0)); break; default: @@ -160,8 +160,8 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr __ movd(locations->Out().AsRegister<Register>(), src); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movd(locations->Out().AsRegisterPairLow<Register>(), src); __ pshufd(tmp, src, Immediate(1)); __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); @@ -1022,8 +1022,8 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movd(dst, locations->InAt(0).AsRegister<Register>()); break; case DataType::Type::kInt64: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ xorps(tmp, tmp); __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cbe9e0a35c..e85f9001b1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6234,6 +6234,27 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if ((false) && mask_bits == 16u) { + // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { @@ -6426,6 +6447,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -6434,7 +6457,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6655,6 +6684,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + break; + } } if (zero.IsLinked()) { @@ -6681,6 +6725,10 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6900,6 +6948,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0082853184..2d14d4cdc2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 510eec4f30..9f8b1bb038 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5440,6 +5440,27 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + CpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if ((false) && mask_bits == 16u) { + // FIXME: cmpw() erroneously emits the constant as 32 bits instead of 16 bits. b/71853552 + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5812,6 +5833,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5820,7 +5843,13 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for @@ -6049,6 +6078,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + if (zero.IsLinked()) { + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } + break; + } } if (zero.IsLinked()) { @@ -6075,6 +6125,10 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } @@ -6261,7 +6315,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kInterfaceCheck: { // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6297,6 +6351,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsLinked()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e86123ef01..97f8ec7ae0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610..fbcbe3608e 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h" namespace art { @@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - VisitInstruction(check); - HInstruction* input = check->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name) { + if (!check->InputAt(input_pos)->IsIntConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", check->DebugName(), check->GetId(), - input->DebugName(), - input->GetId())); + input_pos, + name, + check->InputAt(2)->DebugName(), + check->InputAt(2)->GetId())); + } else if (check_value) { + uint32_t actual_value = + static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); + if (actual_value != expected_value) { + AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", + check->DebugName(), + check->GetId(), + name, + actual_value, + expected_value)); + } } } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", - instruction->DebugName(), - instruction->GetId(), - input->DebugName(), - input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + VisitInstruction(check); + HInstruction* input = check->InputAt(1); + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + if (!input->IsNullConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } + bool check_values = false; + BitString::StorageType expected_path_to_root = 0u; + BitString::StorageType expected_mask = 0u; + { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = check->GetClass().Get(); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + if (state == SubtypeCheckInfo::kAssigned) { + expected_path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); + expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); + check_values = true; + } else { + AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", + check->DebugName(), + check->GetId())); + } + } + CheckTypeCheckBitstringInput( + check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root"); + CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask"); + } else { + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } } } +void GraphChecker::VisitCheckCast(HCheckCast* check) { + HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { + HandleTypeCheckInstruction(instruction); +} + void GraphChecker::HandleLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240..dbedc40518 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name); + void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 12c69889ab..55191214a6 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -389,16 +389,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + StartAttributeStream("check_kind") << check->GetTypeCheckKind(); StartAttributeStream("must_do_null_check") << std::boolalpha - << check_cast->MustDoNullCheck() << std::noboolalpha; + << check->MustDoNullCheck() << std::noboolalpha; + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + StartAttributeStream("path_to_root") << std::hex + << "0x" << check->GetBitstringPathToRoot() << std::dec; + StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; + } + } + + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { + HandleTypeCheckInstruction(check_cast); } void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << instance_of->MustDoNullCheck() << std::noboolalpha; + HandleTypeCheckInstruction(instance_of); } void VisitArrayLength(HArrayLength* array_length) OVERRIDE { @@ -648,20 +655,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } + // For the builder and the inliner, we want to add extra information on HInstructions + // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference)) { - ReferenceTypeInfo info = instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo(); + && (instruction->GetType() == DataType::Type::kReference || + instruction->IsInstanceOf() || + instruction->IsCheckCast())) { + ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) + ? instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo() + : instruction->IsInstanceOf() + ? instruction->AsInstanceOf()->GetTargetClassRTI() + : instruction->AsCheckCast()->GetTargetClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + if (instruction->GetType() == DataType::Type::kReference) { + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + } StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass()) { + } else if (instruction->IsLoadClass() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 64a1eccf60..0205c6a4d3 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1811,29 +1811,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (cls == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (cls->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (cls->IsArrayClass()) { - if (cls->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (cls->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (cls->IsFinal()) { - return TypeCheckKind::kExactCheck; - } else if (cls->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); @@ -1848,22 +1825,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - - bool needs_access_check = true; - if (klass != nullptr) { - if (klass->IsPublic()) { - needs_access_check = false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { - needs_access_check = false; - } - } - } - + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1908,25 +1871,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, + dex::TypeIndex type_index) { + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); + ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass( + soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_); + // TODO: Avoid creating excessive handles if the method references the same class repeatedly. + // (Use a map on the local_allocator_.) + return handles_->NewHandle(klass); +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { + if (klass == nullptr) { + return true; + } else if (klass->IsPublic()) { + return false; + } else { + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); + } +} + void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); + TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( + klass.Get(), code_generator_, compiler_driver_, needs_access_check); + + HInstruction* class_or_null = nullptr; + HIntConstant* bitstring_path_to_root = nullptr; + HIntConstant* bitstring_mask = nullptr; + if (check_kind == TypeCheckKind::kBitstringCheck) { + // TODO: Allow using the bitstring check also if we need an access check. + DCHECK(!needs_access_check); + class_or_null = graph_->GetNullConstant(dex_pc); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + uint32_t path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); + uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); + bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); + bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); + } else { + class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); + } + DCHECK(class_or_null != nullptr); + if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction( + new (allocator_) HCheckCast(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c53277..f78829232d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -39,6 +39,7 @@ class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; class OptimizingCompilerStats; +class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; @@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); + Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); + // Returns the outer-most compiling method's class. ObjPtr<mirror::Class> GetOutermostCompilingClass() const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d..2538fa38f1 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -576,7 +576,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, + HInstruction* object, + /*out*/bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -586,7 +588,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo return false; } - ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -611,8 +612,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + check_cast->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -630,15 +631,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = check_cast->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -649,8 +653,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + instruction->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -673,7 +677,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { // Note: The `outcome` is initialized to please valgrind - the compiler can reorder // the return value check with the `outcome` check, b/27651442 . bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -686,11 +690,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && !load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = instruction->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ca1b451e6b..2f8e33f941 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2011,6 +2011,14 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 99b8b5df74..830d0403e4 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2811,6 +2811,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 113c9de5a2..cafa5228d9 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2835,6 +2835,15 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 521bad27e2..89f1818be2 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2416,6 +2416,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index baa410b884..46b7f3f1ce 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1105,6 +1105,14 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 6dd8b8e1f5..6483b7cb2a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -897,6 +897,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { CreateFPFPToFPCallLocations(allocator_, invoke); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 727431a493..5587f87dae 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -865,6 +865,15 @@ void HLoopInformation::Populate() { graph->SetHasLoops(true); } +void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) { + DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this); + blocks_.Union(&inner_loop->blocks_); + HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(this); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { HBasicBlock* block = header_->GetPredecessors()[0]; DCHECK(irreducible_ || (block == header_->GetDominator())); @@ -3096,6 +3105,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; + case TypeCheckKind::kBitstringCheck: + return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 2047954207..b0657d6f1c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -826,6 +826,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Finds blocks that are part of this loop. void Populate(); + // Updates blocks population of the loop and all of its outer' ones recursively after the + // population of the inner loop is updated. + void PopulateInnerLoopUpwards(HLoopInformation* inner_loop); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -856,6 +860,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { bool HasExitEdge() const; + // Resets back edge and blocks-in-loop data. + void ResetBasicBlockData() { + back_edges_.clear(); + ClearAllBlocks(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -998,6 +1008,18 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { loop_information_->AddBackEdge(back_edge); } + // Registers a back edge; if the block was not a loop header before the call associates a newly + // created loop info with it. + // + // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop + // info for all blocks during back edges recalculation. + void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) { + if (loop_information_ == nullptr || loop_information_->GetHeader() != this) { + loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_); + } + loop_information_->AddBackEdge(back_edge); + } + HGraph* GetGraph() const { return graph_; } void SetGraph(HGraph* graph) { graph_ = graph; } @@ -5929,8 +5951,7 @@ class HLoadClass FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass), - loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + klass_(klass) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -5940,6 +5961,7 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); + SetPackedFlag<kFlagValidLoadedClassRTI>(false); } bool IsClonable() const OVERRIDE { return true; } @@ -5988,13 +6010,18 @@ class HLoadClass FINAL : public HInstruction { } ReferenceTypeInfo GetLoadedClassRTI() { - return loaded_class_rti_; + if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - void SetLoadedClassRTI(ReferenceTypeInfo rti) { - // Make sure we only set exact types (the loaded class should never be merged). - DCHECK(rti.IsExact()); - loaded_class_rti_ = rti; + // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidLoadedClassRTI>(true); } dex::TypeIndex GetTypeIndex() const { return type_index_; } @@ -6047,7 +6074,8 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; @@ -6075,8 +6103,6 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; - - ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6604,71 +6630,156 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. + kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction { public: - HInstanceOf(HInstruction* object, - HLoadClass* target_class, - TypeCheckKind check_kind, - uint32_t dex_pc) - : HExpression(DataType::Type::kBool, - SideEffectsForArchRuntimeCalls(check_kind), - dex_pc) { + HTypeCheckInstruction(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask, + SideEffects side_effects) + : HVariableInputSizeInstruction( + side_effects, + dex_pc, + allocator, + /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, + kArenaAllocTypeCheckInputs), + klass_(klass) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); + SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class); + SetRawInputAt(1, target_class_or_null); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); + if (check_kind == TypeCheckKind::kBitstringCheck) { + DCHECK(target_class_or_null->IsNullConstant()); + SetRawInputAt(2, bitstring_path_to_root); + SetRawInputAt(3, bitstring_mask); + } else { + DCHECK(target_class_or_null->IsLoadClass()); + } } HLoadClass* GetTargetClass() const { + DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } + uint32_t GetBitstringPathToRoot() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* path_to_root = InputAt(2); + DCHECK(path_to_root->IsIntConstant()); + return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); + } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + uint32_t GetBitstringMask() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* mask = InputAt(3); + DCHECK(mask->IsIntConstant()); + return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); } - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); + bool IsClonable() const OVERRIDE { return true; } + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); + return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); } - // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. - return check_kind != TypeCheckKind::kExactCheck; + ReferenceTypeInfo GetTargetClassRTI() { + if (GetPackedFlag<kFlagValidTargetClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { - return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + // Target class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidTargetClassRTI>(true); } - DECLARE_INSTRUCTION(InstanceOf); + Handle<mirror::Class> GetClass() const { + return klass_; + } protected: - DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + + Handle<mirror::Class> klass_; +}; + +class HInstanceOf FINAL : public HTypeCheckInstruction { + public: + HInstanceOf(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffectsForArchRuntimeCalls(check_kind)) {} + + DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; } + + bool NeedsEnvironment() const OVERRIDE { + return CanCallRuntime(GetTypeCheckKind()); + } + + static bool CanCallRuntime(TypeCheckKind check_kind) { + // Mips currently does runtime calls for any other checks. + return check_kind != TypeCheckKind::kExactCheck; + } + + static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { + return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + + DECLARE_INSTRUCTION(InstanceOf); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); }; class HBoundType FINAL : public HExpression<1> { @@ -6718,31 +6829,25 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast FINAL : public HTypeCheckInstruction { public: HCheckCast(HInstruction* object, - HLoadClass* target_class, + HInstruction* target_class_or_null, TypeCheckKind check_kind, - uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { - SetPackedField<TypeCheckKindField>(check_kind); - SetPackedFlag<kFlagMustDoNullCheck>(true); - SetRawInputAt(0, object); - SetRawInputAt(1, target_class); - } - - HLoadClass* GetTargetClass() const { - HInstruction* load_class = InputAt(1); - DCHECK(load_class->IsLoadClass()); - return load_class->AsLoadClass(); - } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffects::CanTriggerGC()) {} bool NeedsEnvironment() const OVERRIDE { // Instruction may throw a CheckCastError. @@ -6751,24 +6856,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { bool CanThrow() const OVERRIDE { return true; } - bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } - void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } - TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } - bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; - static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** @@ -7309,19 +7400,19 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { public: explicit CloneAndReplaceInstructionVisitor(HGraph* graph) - : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {} + : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} void VisitInstruction(HInstruction* instruction) OVERRIDE { if (instruction->IsClonable()) { ReplaceInstrOrPhiByClone(instruction); - instr_replaced_by_clones_count++; + instr_replaced_by_clones_count_++; } } - size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; } + size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; } private: - size_t instr_replaced_by_clones_count; + size_t instr_replaced_by_clones_count_; DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a3b1f0c5af..c35c490118 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -382,6 +382,8 @@ class OptimizingCompiler FINAL : public Compiler { PassObserver* pass_observer, VariableSizedHandleScope* handles) const; + void GenerateJitDebugInfo(debug::MethodDebugInfo method_debug_info); + std::unique_ptr<OptimizingCompilerStats> compilation_stats_; std::unique_ptr<std::ostream> visualizer_output_; @@ -1230,7 +1232,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1246,14 +1248,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1361,7 +1356,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.trampoline_name.empty()); + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1377,14 +1372,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - info); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); @@ -1408,4 +1396,22 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } +void OptimizingCompiler::GenerateJitDebugInfo(debug::MethodDebugInfo info) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + DCHECK(compiler_options.GenerateAnyDebugInfo()); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + GetCompilerDriver()->GetInstructionSet(), + GetCompilerDriver()->GetInstructionSetFeatures(), + mini_debug_info, + ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + MutexLock mu(Thread::Current(), g_jit_debug_mutex); + JITCodeEntry* entry = CreateJITCodeEntry(elf_file); + IncrementJITCodeEntryRefcount(entry, info.code_address); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 0023265e50..a6a2f46d2e 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -99,6 +99,7 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8..59733397bf 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() { } } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { + // Record only those bitstring type checks that make it to the codegen stage. + if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { + // Record only those bitstring type checks that make it to the codegen stage. + if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c1..f6e4d3ef99 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: + void VisitCheckCast(HCheckCast* check_cast) OVERRIDE; + void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE; void VisitNullCheck(HNullCheck* check) OVERRIDE; void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 8bb124e066..178d7fd0f0 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; void VisitLoadString(HLoadString* instr) OVERRIDE; void VisitLoadException(HLoadException* instr) OVERRIDE; @@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } + } else if (instr->IsInstanceOf()) { + HInstanceOf* iof = instr->AsInstanceOf(); + DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); + } else if (instr->IsCheckCast()) { + HCheckCast* check = instr->AsCheckCast(); + DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -644,15 +650,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> resolved_class = instr->GetClass(); - if (IsAdmissible(resolved_class.Get())) { - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( - resolved_class, /* is_exact */ true)); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidLoadedClassRTI(); } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); } +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { + ScopedObjectAccess soa(Thread::Current()); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidTargetClassRTI(); + } +} + void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } @@ -720,8 +731,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -730,12 +739,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - if (class_rti.IsValid()) { + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> klass = check_cast->GetClass(); + if (IsAdmissible(klass.Get())) { DCHECK(is_first_run_); - ScopedObjectAccess soa(Thread::Current()); + check_cast->SetValidTargetClassRTI(); // This is the first run of RTP and class is resolved. - bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + bool is_exact = klass->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 1e49411c72..dffef17587 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -236,6 +236,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!klass->IsProxyClass()); + DCHECK(!klass->IsArrayClass()); + + if (Runtime::Current()->UseJitCompilation()) { + // If we're JITting, try to assign a type check bitstring (fall through). + } else if (codegen->GetCompilerOptions().IsBootImage()) { + const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); + if (!compiler_driver->IsImageClass(descriptor)) { + return false; + } + // If the target is a boot image class, try to assign a type check bitstring (fall through). + // (If --force-determinism, this was already done; repeating is OK and yields the same result.) + } else { + // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring + // already assigned in the boot image. + return false; + } + + // Try to assign a type check bitstring. + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() + // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 + kIsDebugBuild && + codegen->GetCompilerOptions().IsBootImage() && + codegen->GetCompilerOptions().IsForceDeterminism()) { + SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) + << klass->PrettyDescriptor() << "/" << old_state + << " in " << codegen->GetGraph()->PrettyMethod(); + } + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) { + if (klass == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (klass->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (klass->IsArrayClass()) { + if (klass->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (klass->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. + return TypeCheckKind::kExactCheck; + } else if (kUseBitstringTypeCheck && + !needs_access_check && + CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { + // TODO: We should not need the `!needs_access_check` check but getting rid of that + // requires rewriting some optimizations in instruction simplifier. + return TypeCheckKind::kBitstringCheck; + } else if (klass->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91e..fa3e948eeb 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -44,12 +44,10 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by Sharpening and InstructionSimplifier. + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, + CodeGenerator* codegen, + CompilerDriver* compiler_driver); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, @@ -58,10 +56,19 @@ class HSharpening : public HOptimization { const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + bool needs_access_check) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc new file mode 100644 index 0000000000..a7c23bef7e --- /dev/null +++ b/compiler/optimizing/superblock_cloner.cc @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "superblock_cloner.h" + +#include "common_dominator.h" +#include "graph_checker.h" + +#include <iostream> + +namespace art { + +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; + +void HEdge::Dump(std::ostream& stream) const { + stream << "(" << from_ << "->" << to_ << ")"; +} + +// +// Static helper methods. +// + +// Returns whether instruction has any uses (regular or environmental) outside the region, +// defined by basic block set. +static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) { + auto& uses = instr->GetUses(); + for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + auto& env_uses = instr->GetEnvUses(); + for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser()->GetHolder(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + return false; +} + +// Returns whether the phi's inputs are the same HInstruction. +static bool ArePhiInputsTheSame(const HPhi* phi) { + HInstruction* first_input = phi->InputAt(0); + for (size_t i = 1, e = phi->InputCount(); i < e; i++) { + if (phi->InputAt(i) != first_input) { + return false; + } + } + + return true; +} + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 != nullptr || loop2 != nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } else if (loop2->IsIn(*loop1)) { + return loop1; + } + HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); + return block->GetLoopInformation(); +} + +// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. +static void OrderLoopsHeadersPredecessors(HGraph* graph) { + for (HBasicBlock* block : graph->GetPostOrder()) { + if (block->IsLoopHeader()) { + graph->OrderLoopHeaderPredecessors(block); + } + } +} + +// +// Helpers for CloneBasicBlock. +// + +void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) { + DCHECK(!copy_instr->IsPhi()); + for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) { + // Copy instruction holds the same input as the original instruction holds. + HInstruction* orig_input = copy_instr->InputAt(i); + if (!IsInOrigBBSet(orig_input->GetBlock())) { + // Defined outside the subgraph. + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + // copy_instr will be registered as a user of copy_inputs after returning from this function: + // 'copy_block->AddInstruction(copy_instr)'. + copy_instr->SetRawInputAt(i, copy_input); + } +} + +void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, + const HEnvironment* orig_env) { + if (orig_env->GetParent() != nullptr) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent()); + } + HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr); + + for (size_t i = 0; i < orig_env->Size(); i++) { + HInstruction* env_input = orig_env->GetInstructionAt(i); + if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) { + env_input = GetInstrCopy(env_input); + DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr); + } + copy_env->SetRawEnvAt(i, env_input); + if (env_input != nullptr) { + env_input->AddEnvUseAt(copy_env, i); + } + } + // InsertRawEnvironment assumes that instruction already has an environment that's why we use + // SetRawEnvironment in the 'else' case. + // As this function calls itself recursively with the same copy_instr - this copy_instr may + // have partially copied chain of HEnvironments. + if (copy_instr->HasEnvironment()) { + copy_instr->InsertRawEnvironment(copy_env); + } else { + copy_instr->SetRawEnvironment(copy_env); + } +} + +// +// Helpers for RemapEdgesSuccessors. +// + +void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + + size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block); + size_t phi_input_count = 0; + // This flag reflects whether the original successor has at least one phi and this phi + // has been already processed in the loop. Used for validation purposes in DCHECK to check that + // in the end all of the phis in the copy successor have the same number of inputs - the number + // of copy successor's predecessors. + bool first_phi_met = false; + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(this_index); + // Remove corresponding input for original phi. + orig_phi->RemoveInputAt(this_index); + // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds + // to orig_block, so add the input at the end of the list. + copy_phi->AddInput(orig_phi_input); + if (!first_phi_met) { + phi_input_count = copy_phi->InputCount(); + first_phi_met = true; + } else { + DCHECK_EQ(phi_input_count, copy_phi->InputCount()); + } + } + // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds + // to the previously added phi inputs position. + orig_block->ReplaceSuccessor(orig_succ, copy_succ); + DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count); +} + +void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + copy_block->AddSuccessor(copy_succ); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + copy_phi->AddInput(orig_phi_input); + } +} + +void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + DCHECK(copy_block->HasSuccessor(orig_succ)); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + orig_phi->AddInput(orig_phi_input); + } +} + +// +// Local versions of CF calculation/adjustment routines. +// + +// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect +// the performance of the base version by checking the local set. +// TODO: this version works when updating the back edges info for natural loop-based local_set. +// Check which exactly types of subgraphs can be analysed or rename it to +// FindBackEdgesInTheNaturalLoop. +void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) { + ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited.GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. + ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(), + 0u, + arena_->Adapter(kArenaAllocGraphBuilder)); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + + visited.SetBit(entry_block->GetBlockId()); + visiting.SetBit(entry_block->GetBlockId()); + worklist.push_back(entry_block); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (!local_set->IsBitSet(successor_id)) { + continue; + } + + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdgeWhileUpdating(current); + } else if (!visited.IsBitSet(successor_id)) { + visited.SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } +} + +void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { + // TODO: DCHECK that after the transformation the graph is connected. + HBasicBlock* block_entry = nullptr; + + if (outer_loop_ == nullptr) { + for (auto block : graph_->GetBlocks()) { + if (block != nullptr) { + outer_loop_bb_set->SetBit(block->GetBlockId()); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + block_entry = graph_->GetEntryBlock(); + } else { + outer_loop_bb_set->Copy(&outer_loop_bb_set_); + block_entry = outer_loop_->GetHeader(); + + // Add newly created copy blocks. + for (auto entry : *bb_map_) { + outer_loop_bb_set->SetBit(entry.second->GetBlockId()); + } + + // Clear loop_info for the whole outer loop. + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + + FindBackEdgesLocal(block_entry, outer_loop_bb_set); + + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + // Reset LoopInformation for regular blocks and old headers which are no longer loop headers. + if (info != nullptr && + (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) { + block->SetLoopInformation(nullptr); + } + } +} + +// This is a modified version of HGraph::AnalyzeLoops. +GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) { + // We iterate post order to ensure we visit inner loops before outer loops. + // `PopulateRecursive` needs this guarantee to know whether a natural loop + // contains an irreducible loop. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return kAnalysisFailThrowCatchLoop; + } + block->GetLoopInformation()->Populate(); + } + } + + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + HLoopInformation* cur_loop = block->GetLoopInformation(); + HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(cur_loop); + } + } + } + + return kAnalysisSuccess; +} + +void SuperblockCloner::CleanUpControlFlow() { + // TODO: full control flow clean up for now, optimize it. + graph_->ClearDominanceInformation(); + + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + // TODO: do it locally. + graph_->SimplifyCFG(); + graph_->ComputeDominanceInformation(); + + // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just + // before in ComputeDominanceInformation. + GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set); + DCHECK_EQ(result, kAnalysisSuccess); + + // TODO: do it locally + OrderLoopsHeadersPredecessors(graph_); + + graph_->ComputeTryBlockInformation(); +} + +// +// Helpers for ResolveDataFlow +// + +void SuperblockCloner::ResolvePhi(HPhi* phi) { + HBasicBlock* phi_block = phi->GetBlock(); + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* input_block = input->GetBlock(); + + // Originally defined outside the region. + if (!IsInOrigBBSet(input_block)) { + continue; + } + HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i]; + if (!IsInOrigBBSet(corresponding_block)) { + phi->ReplaceInput(GetInstrCopy(input), i); + } + } +} + +// +// Main algorithm methods. +// + +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { + DCHECK(exits->empty()); + for (uint32_t block_id : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(block_id); + for (HBasicBlock* succ : block->GetSuccessors()) { + if (!IsInOrigBBSet(succ)) { + exits->push_back(succ); + } + } + } +} + +void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { + DCHECK(outer_loop_ == nullptr); + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // For a reducible graph we need to update back-edges and dominance information only for + // the outermost loop which is affected by the transformation - it can be found by picking + // the common most outer loop of loops to which the subgraph exits blocks belong. + // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case). + for (HBasicBlock* exit : exits) { + HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation(); + if (loop_exit_loop_info == nullptr) { + outer_loop_ = nullptr; + break; + } + outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); + } + + if (outer_loop_ != nullptr) { + // Save the loop population info as it will be changed later. + outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks()); + } +} + +void SuperblockCloner::RemapEdgesSuccessors() { + // Redirect incoming edges. + for (HEdge e : *remap_incoming_) { + HBasicBlock* orig_block = GetBlockById(e.GetFrom()); + HBasicBlock* orig_succ = GetBlockById(e.GetTo()); + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + + // Redirect internal edges. + for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { + HBasicBlock* orig_block = GetBlockById(orig_block_id); + + for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) { + uint32_t orig_succ_id = orig_succ->GetBlockId(); + + // Check for outgoing edge. + if (!IsInOrigBBSet(orig_succ)) { + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + continue; + } + + auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + + // Due to construction all successors of copied block were set to original. + if (copy_redir != remap_copy_internal_->end()) { + RemapCopyInternalEdge(orig_block, orig_succ); + } else { + AddCopyInternalEdge(orig_block, orig_succ); + } + + if (orig_redir != remap_orig_internal_->end()) { + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + } + } +} + +void SuperblockCloner::AdjustControlFlowInfo() { + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + graph_->ClearDominanceInformation(); + // TODO: Do it locally. + graph_->ComputeDominanceInformation(); +} + +// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to +// the valid values; only phis' inputs must be adjusted. +void SuperblockCloner::ResolveDataFlow() { + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + ResolvePhi(orig_phi); + ResolvePhi(copy_phi); + } + if (kIsDebugBuild) { + // Inputs of instruction copies must be already mapped to correspondent inputs copies. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + CheckInstructionInputsRemapping(it.Current()); + } + } + } +} + +// +// Debug and logging methods. +// + +void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { + DCHECK(!orig_instr->IsPhi()); + HInstruction* copy_instr = GetInstrCopy(orig_instr); + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock())); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (!IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + + // Resolve environment. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + + for (size_t i = 0, e = orig_env->Size(); i < e; ++i) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + } +} + +// +// Public methods. +// + +SuperblockCloner::SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map) + : graph_(graph), + arena_(graph->GetAllocator()), + orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + remap_orig_internal_(nullptr), + remap_copy_internal_(nullptr), + remap_incoming_(nullptr), + bb_map_(bb_map), + hir_map_(hir_map), + outer_loop_(nullptr), + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + orig_bb_set_.Copy(orig_bb_set); +} + +void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming) { + remap_orig_internal_ = remap_orig_internal; + remap_copy_internal_ = remap_copy_internal; + remap_incoming_ = remap_incoming; +} + +bool SuperblockCloner::IsSubgraphClonable() const { + // TODO: Support irreducible graphs and graphs with try-catch. + if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) { + return false; + } + + // Check that there are no instructions defined in the subgraph and used outside. + // TODO: Improve this by accepting graph with such uses but only one exit. + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + } + + return true; +} + +void SuperblockCloner::Run() { + DCHECK(bb_map_ != nullptr); + DCHECK(hir_map_ != nullptr); + DCHECK(remap_orig_internal_ != nullptr && + remap_copy_internal_ != nullptr && + remap_incoming_ != nullptr); + DCHECK(IsSubgraphClonable()); + + // Find an area in the graph for which control flow information should be adjusted. + FindAndSetLocalAreaForAdjustments(); + // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be + // adjusted. + CloneBasicBlocks(); + // Connect the blocks together/remap successors and fix phis which are directly affected my the + // remapping. + RemapEdgesSuccessors(); + // Recalculate dominance and backedge information which is required by the next stage. + AdjustControlFlowInfo(); + // Fix data flow of the graph. + ResolveDataFlow(); +} + +void SuperblockCloner::CleanUp() { + CleanUpControlFlow(); + + // Remove phis which have all inputs being same. + // When a block has a single predecessor it must not have any phis. However after the + // transformation it could happen that there is such block with a phi with a single input. + // As this is needed to be processed we also simplify phis with multiple same inputs here. + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + orig_block->RemovePhi(phi); + } + } + + HBasicBlock* copy_block = GetBlockCopy(orig_block); + for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + copy_block->RemovePhi(phi); + } + } + } +} + +HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { + HGraph* graph = orig_block->GetGraph(); + HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc()); + graph->AddBlock(copy_block); + + // Clone all the phis and add them to the map. + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + copy_block->AddPhi(copy_instr->AsPhi()); + copy_instr->AsPhi()->RemoveAllInputs(); + DCHECK(!orig_instr->HasEnvironment()); + hir_map_->Put(orig_instr, copy_instr); + } + + // Clone all the instructions and add them to the map. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + ReplaceInputsWithCopies(copy_instr); + copy_block->AddInstruction(copy_instr); + if (orig_instr->HasEnvironment()) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment()); + } + hir_map_->Put(orig_instr, copy_instr); + } + + return copy_block; +} + +void SuperblockCloner::CloneBasicBlocks() { + // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied + // instructions might be replaced by copies of the original inputs (depending where those inputs + // are defined). So the definitions of the original inputs must be visited before their original + // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')" + // guarantees that. + for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) { + if (!IsInOrigBBSet(orig_block)) { + continue; + } + HBasicBlock* copy_block = CloneBasicBlock(orig_block); + bb_map_->Put(orig_block, copy_block); + if (kSuperblockClonerLogging) { + std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() << + std::endl; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h new file mode 100644 index 0000000000..23de692673 --- /dev/null +++ b/compiler/optimizing/superblock_cloner.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ +#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ + +#include "base/arena_bit_vector.h" +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" +#include "nodes.h" + +namespace art { + +static const bool kSuperblockClonerLogging = false; +static const bool kSuperblockClonerVerify = false; + +// Represents an edge between two HBasicBlocks. +// +// Note: objects of this class are small - pass them by value. +class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { + public: + HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} + + uint32_t GetFrom() const { return from_; } + uint32_t GetTo() const { return to_; } + + bool operator==(const HEdge& other) const { + return this->from_ == other.from_ && this->to_ == other.to_; + } + + bool operator!=(const HEdge& other) const { return !operator==(other); } + void Dump(std::ostream& stream) const; + + // Returns whether an edge represents a valid edge in CF graph: whether the from_ block + // has to_ block as a successor. + bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } + + private: + // Predecessor block id. + uint32_t from_; + // Successor block id. + uint32_t to_; +}; + +// Returns whether a HEdge edge corresponds to an existing edge in the graph. +inline bool IsEdgeValid(HEdge edge, HGraph* graph) { + if (!edge.IsValid()) { + return false; + } + uint32_t from = edge.GetFrom(); + uint32_t to = edge.GetTo(); + if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { + return false; + } + + HBasicBlock* block_from = graph->GetBlocks()[from]; + HBasicBlock* block_to = graph->GetBlocks()[to]; + if (block_from == nullptr || block_to == nullptr) { + return false; + } + + return block_from->HasSuccessor(block_to, 0); +} + +// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without +// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted +// automatically. The clone transformation is defined by specifying a set of basic blocks to copy +// and a set of rules how to treat edges, remap their successors. By using this approach such +// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. +// +// The idea of the transformation is based on "Superblock cloning" technique described in the book +// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University +// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective +// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. +// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. +// +// There are two states of the IR graph: original graph (before the transformation) and +// copy graph (after). +// +// Before the transformation: +// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original +// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", +// where pred, succ - basic blocks): +// - internal - pred, succ are members of ‘orig_bb_set’. +// - outside - pred, succ are not members of ‘orig_bb_set’. +// - incoming - pred is not a member of ‘orig_bb_set’, succ is. +// - outgoing - pred is a member of ‘orig_bb_set’, succ is not. +// +// Transformation: +// +// 1. Initial cloning: +// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks +// form ‘copy_bb_set’. +// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge +// set. +// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge +// set. +// 2. Successors remapping. +// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should +// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). +// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors +// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). +// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph +// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into +// (X, Y_1)). +// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). +// 4. Fix/resolve data flow. +// 5. Do cleanups (DCE, critical edges splitting, etc). +// +class SuperblockCloner : public ValueObject { + public: + // TODO: Investigate optimal types for the containers. + using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; + using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; + using HBasicBlockSet = ArenaBitVector; + using HEdgeSet = ArenaHashSet<HEdge>; + + SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map); + + // Sets edge successor remapping info specified by corresponding edge sets. + void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming); + + // Returns whether the specified subgraph is copyable. + // TODO: Start from small range of graph patterns then extend it. + bool IsSubgraphClonable() const; + + // Runs the copy algorithm according to the description. + void Run(); + + // Cleans up the graph after transformation: splits critical edges, recalculates control flow + // information (back-edges, dominators, loop info, etc), eliminates redundant phis. + void CleanUp(); + + // Returns a clone of a basic block (orig_block). + // + // - The copy block will have no successors/predecessors; they should be set up manually. + // - For each instruction in the orig_block a copy is created and inserted into the copy block; + // this correspondence is recorded in the map (old instruction, new instruction). + // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the + // same, as in the original block, PHIs do not reflect a correct correspondence between the + // value and predecessors (as the copy block has no predecessors by now), etc. + HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); + + // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ + // and hir_map_. + void CloneBasicBlocks(); + + HInstruction* GetInstrCopy(HInstruction* orig_instr) const { + auto copy_input_iter = hir_map_->find(orig_instr); + DCHECK(copy_input_iter != hir_map_->end()); + return copy_input_iter->second; + } + + HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { + HBasicBlock* block = bb_map_->Get(orig_block); + DCHECK(block != nullptr); + return block; + } + + HInstruction* GetInstrOrig(HInstruction* copy_instr) const { + for (auto it : *hir_map_) { + if (it.second == copy_instr) { + return it.first; + } + } + return nullptr; + } + + bool IsInOrigBBSet(uint32_t block_id) const { + return orig_bb_set_.IsBitSet(block_id); + } + + bool IsInOrigBBSet(const HBasicBlock* block) const { + return IsInOrigBBSet(block->GetBlockId()); + } + + private: + // Fills the 'exits' vector with the subgraph exits. + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + + // Finds and records information about the area in the graph for which control-flow (back edges, + // loops, dominators) needs to be adjusted. + void FindAndSetLocalAreaForAdjustments(); + + // Remaps edges' successors according to the info specified in the edges sets. + // + // Only edge successors/predecessors and phis' input records (to have a correspondence between + // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither + // phis' nor instructions' inputs values are resolved. + void RemapEdgesSuccessors(); + + // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // FindAndSetLocalAreaForAdjustments. + void AdjustControlFlowInfo(); + + // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in + // the SSA form. + void ResolveDataFlow(); + + // + // Helpers for CloneBasicBlock. + // + + // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the + // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. + void ReplaceInputsWithCopies(HInstruction* copy_instr); + + // Recursively clones the environment for the copy instruction. If the input of the original + // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise + // leaves it the same as original. + void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); + + // + // Helpers for RemapEdgesSuccessors. + // + + // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and + // copy_succ. + void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. + void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. + void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // + // Local versions of control flow calculation/adjustment routines. + // + + void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); + void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); + GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); + void CleanUpControlFlow(); + + // + // Helpers for ResolveDataFlow + // + + // Resolves the inputs of the phi. + void ResolvePhi(HPhi* phi); + + // + // Debug and logging methods. + // + void CheckInstructionInputsRemapping(HInstruction* orig_instr); + + HBasicBlock* GetBlockById(uint32_t block_id) const { + DCHECK(block_id < graph_->GetBlocks().size()); + HBasicBlock* block = graph_->GetBlocks()[block_id]; + DCHECK(block != nullptr); + return block; + } + + HGraph* const graph_; + ArenaAllocator* const arena_; + + // Set of basic block in the original graph to be copied. + HBasicBlockSet orig_bb_set_; + + // Sets of edges which require successors remapping. + const HEdgeSet* remap_orig_internal_; + const HEdgeSet* remap_copy_internal_; + const HEdgeSet* remap_incoming_; + + // Correspondence map for blocks: (original block, copy block). + HBasicBlockMap* bb_map_; + // Correspondence map for instructions: (original HInstruction, copy HInstruction). + HInstructionMap* hir_map_; + // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + HLoopInformation* outer_loop_; + HBasicBlockSet outer_loop_bb_set_; + + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + + DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); +}; + +} // namespace art + +namespace std { + +template <> +struct hash<art::HEdge> { + size_t operator()(art::HEdge const& x) const noexcept { + // Use Cantor pairing function as the hash function. + uint32_t a = x.GetFrom(); + uint32_t b = x.GetTo(); + return (a + b) * (a + b + 1) / 2 + b; + } +}; + +} // namespace std + +#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index fd77eb81fc..f1b7bffdf5 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -17,11 +17,15 @@ #include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" +#include "superblock_cloner.h" #include "gtest/gtest.h" namespace art { +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; + // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. class SuperblockClonerTest : public OptimizingUnitTest { @@ -182,4 +186,121 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) { EXPECT_NE(new_suspend_check, nullptr); } +// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of +// instructions' inputs. +TEST_F(SuperblockClonerTest, CloneBasicBlocks) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.CloneBasicBlocks(); + + EXPECT_EQ(bb_map.size(), 2u); + EXPECT_EQ(hir_map.size(), 12u); + + for (auto it : hir_map) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + + EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock()); + EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind()); + EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType()); + + if (orig_instr->IsPhi()) { + continue; + } + + EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount()); + + // Check that inputs match. + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + HInstruction* copy_input = copy_instr->InputAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + + EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment()); + + // Check that environments match. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + HEnvironment* copy_env = copy_instr->GetEnvironment(); + + EXPECT_EQ(copy_env->GetParent(), nullptr); + EXPECT_EQ(orig_env->Size(), copy_env->Size()); + + for (size_t i = 0, e = orig_env->Size(); i < e; i++) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + HInstruction* copy_input = copy_env->GetInstructionAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + } + } +} + +// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control +// flow. +TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + nullptr, + nullptr); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.FindAndSetLocalAreaForAdjustments(); + cloner.CleanUpControlFlow(); + + EXPECT_TRUE(CheckGraph()); + + EXPECT_TRUE(entry_block_->Dominates(header)); + EXPECT_TRUE(entry_block_->Dominates(exit_block_)); + + EXPECT_EQ(header->GetLoopInformation(), loop_info); + EXPECT_EQ(loop_info->GetHeader(), header); + EXPECT_TRUE(loop_info->Contains(*loop_body)); + EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); +} + } // namespace art |