diff options
Diffstat (limited to 'compiler/optimizing')
35 files changed, 1475 insertions, 356 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a4873202b2..3bd5e14539 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -438,6 +438,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {        case TypeCheckKind::kArrayCheck:        case TypeCheckKind::kUnresolvedCheck:          return false; +      case TypeCheckKind::kBitstringCheck: +        return true;      }      LOG(FATAL) << "Unreachable";      UNREACHABLE(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index a024df8537..273346ab4a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2129,6 +2129,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod    __ Bind(slow_path->GetExitLabel());  } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( +    HTypeCheckInstruction* check, vixl::aarch64::Register temp) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  if (mask_bits == 16u) { +    // Load only the bitstring part of the status word. +    __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); +    // Extract the bitstring bits. +    __ Ubfx(temp, temp, 0, mask_bits); +  } +  // Compare the bitstring bits to `path_to_root`. +  __ Cmp(temp, path_to_root); +} +  void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {    BarrierType type = BarrierAll; @@ -3866,6 +3886,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -3874,7 +3896,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    // The "out" register is used as a temporary, so it overlaps with the inputs.    // Note that TypeCheckSlowPathARM64 uses this register too.    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3887,7 +3915,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    Register obj = InputRegisterAt(instruction, 0); -  Register cls = InputRegisterAt(instruction, 1); +  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) +      ? Register() +      : InputRegisterAt(instruction, 1);    Location out_loc = locations->Out();    Register out = OutputRegister(instruction);    const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4073,6 +4103,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {        }        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out); +      __ Cset(out, eq); +      if (zero.IsLinked()) { +        __ B(&done); +      } +      break; +    }    }    if (zero.IsLinked()) { @@ -4095,7 +4142,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations =        new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.    locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));  } @@ -4105,7 +4158,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    Register obj = InputRegisterAt(instruction, 0); -  Register cls = InputRegisterAt(instruction, 1); +  Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) +      ? Register() +      : InputRegisterAt(instruction, 1);    const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);    DCHECK_GE(num_temps, 1u);    DCHECK_LE(num_temps, 3u); @@ -4286,6 +4341,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {        __ B(ne, &start_loop);        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp2_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp); +      __ B(ne, type_check_slow_path->GetEntryLabel()); +      break; +    }    }    __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index a8a9802f9a..6a52eecbd3 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -264,6 +264,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {   private:    void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,                                          vixl::aarch64::Register class_reg); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                         vixl::aarch64::Register temp);    void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);    void HandleBinaryOp(HBinaryOperation* instr); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 6ebcc67b49..b38a006305 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -7523,6 +7523,67 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(    __ Bind(slow_path->GetExitLabel());  } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( +    HTypeCheckInstruction* check, +    vixl32::Register temp, +    vixl32::FlagsUpdate flags_update) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs +  // the Z flag for BNE. This is indicated by the `flags_update` parameter. +  if (mask_bits == 16u) { +    // Load only the bitstring part of the status word. +    __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); +    // Check if the bitstring bits are equal to `path_to_root`. +    if (flags_update == SetFlags) { +      __ Cmp(temp, path_to_root); +    } else { +      __ Sub(temp, temp, path_to_root); +    } +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); +    if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { +      // Compare the bitstring bits using SUB. +      __ Sub(temp, temp, path_to_root); +      // Shift out bits that do not contribute to the comparison. +      __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); +    } else if (IsUint<16>(path_to_root)) { +      if (temp.IsLow()) { +        // Note: Optimized for size but contains one more dependent instruction than necessary. +        //       MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the +        //       macro assembler would use the high reg IP for the constant by default. +        // Compare the bitstring bits using SUB. +        __ Sub(temp, temp, path_to_root & 0x00ffu);  // 16-bit SUB (immediate) T2 +        __ Sub(temp, temp, path_to_root & 0xff00u);  // 32-bit SUB (immediate) T3 +        // Shift out bits that do not contribute to the comparison. +        __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); +      } else { +        // Extract the bitstring bits. +        __ Ubfx(temp, temp, 0, mask_bits); +        // Check if the bitstring bits are equal to `path_to_root`. +        if (flags_update == SetFlags) { +          __ Cmp(temp, path_to_root); +        } else { +          __ Sub(temp, temp, path_to_root); +        } +      } +    } else { +      // Shift out bits that do not contribute to the comparison. +      __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); +      // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. +      if (flags_update == SetFlags) { +        __ Cmp(temp, path_to_root << (32u - mask_bits)); +      } else { +        __ Sub(temp, temp, path_to_root << (32u - mask_bits)); +      } +    } +  } +} +  HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(      HLoadString::LoadKind desired_string_load_kind) {    switch (desired_string_load_kind) { @@ -7714,6 +7775,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -7722,7 +7785,13 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    // The "out" register is used as a temporary, so it overlaps with the inputs.    // Note that TypeCheckSlowPathARM uses this register too.    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7737,7 +7806,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    vixl32::Register obj = InputRegisterAt(instruction, 0); -  vixl32::Register cls = InputRegisterAt(instruction, 1); +  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) +      ? vixl32::Register() +      : InputRegisterAt(instruction, 1);    Location out_loc = locations->Out();    vixl32::Register out = OutputRegister(instruction);    const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7977,6 +8048,26 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)        __ B(slow_path->GetEntryLabel());        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out, DontCare); +      // If `out` is a low reg and we would have another low reg temp, we could +      // optimize this as RSBS+ADC, see GenerateConditionWithZero(). +      // +      // Also, in some cases when `out` is a low reg and we're loading a constant to IP +      // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size +      // would be the same and we would have fewer direct data dependencies. +      codegen_->GenerateConditionWithZero(kCondEQ, out, out);  // CLZ+LSR +      break; +    }    }    if (done.IsReferenced()) { @@ -7994,7 +8085,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations =        new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));  } @@ -8003,7 +8100,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    vixl32::Register obj = InputRegisterAt(instruction, 0); -  vixl32::Register cls = InputRegisterAt(instruction, 1); +  vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) +      ? vixl32::Register() +      : InputRegisterAt(instruction, 1);    Location temp_loc = locations->GetTemp(0);    vixl32::Register temp = RegisterFrom(temp_loc);    const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -8188,6 +8287,20 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {        __ B(ne, &start_loop, /* far_target */ false);        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp2_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); +      __ B(ne, type_check_slow_path->GetEntryLabel()); +      break; +    }    }    if (done.IsReferenced()) {      __ Bind(&done); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 6a07e36022..2114ea1ba1 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -322,6 +322,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {    void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);    void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,                                          vixl32::Register class_reg); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                         vixl::aarch32::Register temp, +                                         vixl::aarch32::FlagsUpdate flags_update);    void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);    void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);    void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index be9ff48a6b..25e2eddbfa 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1950,6 +1950,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode    __ Bind(slow_path->GetExitLabel());  } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                                                     Register temp) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  if (mask_bits == 16u) { +    // Load only the bitstring part of the status word. +    __ LoadFromOffset( +        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); +    // Compare the bitstring bits using XOR. +    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); +    // Compare the bitstring bits using XOR. +    if (IsUint<16>(path_to_root)) { +      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); +    } else { +      __ LoadConst32(TMP, path_to_root); +      __ Xor(temp, temp, TMP); +    } +    // Shift out bits that do not contribute to the comparison. +    __ Sll(temp, temp, 32 - mask_bits); +  } +} +  void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {    __ Sync(0);  // Only stype 0 is supported.  } @@ -3301,7 +3329,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations =        new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));  } @@ -3310,7 +3344,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    Register obj = obj_loc.AsRegister<Register>(); -  Register cls = locations->InAt(1).AsRegister<Register>(); +  Location cls = locations->InAt(1);    Location temp_loc = locations->GetTemp(0);    Register temp = temp_loc.AsRegister<Register>();    const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3349,7 +3383,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {                                          kWithoutReadBarrier);        // Jump to slow path for throwing the exception or doing a        // more involved array check. -      __ Bne(temp, cls, slow_path->GetEntryLabel()); +      __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());        break;      } @@ -3375,7 +3409,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {        // exception.        __ Beqz(temp, slow_path->GetEntryLabel());        // Otherwise, compare the classes. -      __ Bne(temp, cls, &loop); +      __ Bne(temp, cls.AsRegister<Register>(), &loop);        break;      } @@ -3390,7 +3424,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {        // Walk over the class hierarchy to find a match.        MipsLabel loop;        __ Bind(&loop); -      __ Beq(temp, cls, &done); +      __ Beq(temp, cls.AsRegister<Register>(), &done);        // /* HeapReference<Class> */ temp = temp->super_class_        GenerateReferenceLoadOneRegister(instruction,                                         temp_loc, @@ -3413,7 +3447,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {                                          maybe_temp2_loc,                                          kWithoutReadBarrier);        // Do an exact check. -      __ Beq(temp, cls, &done); +      __ Beq(temp, cls.AsRegister<Register>(), &done);        // Otherwise, we need to check that the object's class is a non-primitive array.        // /* HeapReference<Class> */ temp = temp->component_type_        GenerateReferenceLoadOneRegister(instruction, @@ -3472,7 +3506,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {        // Go to next interface.        __ Addiu(TMP, TMP, -2);        // Compare the classes and continue the loop if they do not match. -      __ Bne(AT, cls, &loop); +      __ Bne(AT, cls.AsRegister<Register>(), &loop); +      break; +    } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp2_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp); +      __ Bnez(temp, slow_path->GetEntryLabel());        break;      }    } @@ -7415,6 +7463,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -7423,7 +7473,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    // The output does overlap inputs.    // Note that TypeCheckSlowPathMIPS uses this register too.    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7435,7 +7491,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    Register obj = obj_loc.AsRegister<Register>(); -  Register cls = locations->InAt(1).AsRegister<Register>(); +  Location cls = locations->InAt(1);    Location out_loc = locations->Out();    Register out = out_loc.AsRegister<Register>();    const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7467,7 +7523,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {                                          maybe_temp_loc,                                          read_barrier_option);        // Classes must be equal for the instanceof to succeed. -      __ Xor(out, out, cls); +      __ Xor(out, out, cls.AsRegister<Register>());        __ Sltiu(out, out, 1);        break;      } @@ -7494,7 +7550,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {                                         read_barrier_option);        // If `out` is null, we use it for the result, and jump to `done`.        __ Beqz(out, &done); -      __ Bne(out, cls, &loop); +      __ Bne(out, cls.AsRegister<Register>(), &loop);        __ LoadConst32(out, 1);        break;      } @@ -7512,7 +7568,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {        // Walk over the class hierarchy to find a match.        MipsLabel loop, success;        __ Bind(&loop); -      __ Beq(out, cls, &success); +      __ Beq(out, cls.AsRegister<Register>(), &success);        // /* HeapReference<Class> */ out = out->super_class_        GenerateReferenceLoadOneRegister(instruction,                                         out_loc, @@ -7539,7 +7595,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {                                          read_barrier_option);        // Do an exact check.        MipsLabel success; -      __ Beq(out, cls, &success); +      __ Beq(out, cls.AsRegister<Register>(), &success);        // Otherwise, we need to check that the object's class is a non-primitive array.        // /* HeapReference<Class> */ out = out->component_type_        GenerateReferenceLoadOneRegister(instruction, @@ -7571,7 +7627,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {        slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(            instruction, /* is_fatal */ false);        codegen_->AddSlowPath(slow_path); -      __ Bne(out, cls, slow_path->GetEntryLabel()); +      __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());        __ LoadConst32(out, 1);        break;      } @@ -7603,6 +7659,20 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {        __ B(slow_path->GetEntryLabel());        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out); +      __ Sltiu(out, out, 1); +      break; +    }    }    __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 1f1743ff9e..2e7c736dbd 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {   private:    void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);    void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);    void HandleBinaryOp(HBinaryOperation* operation);    void HandleCondition(HCondition* instruction);    void HandleShift(HBinaryOperation* operation); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index f8851b4eea..5b07b55cbb 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1794,6 +1794,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo    __ Bind(slow_path->GetExitLabel());  } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                                                       GpuRegister temp) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  if (mask_bits == 16u) { +    // Load only the bitstring part of the status word. +    __ LoadFromOffset( +        kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); +    // Compare the bitstring bits using XOR. +    __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); +    // Compare the bitstring bits using XOR. +    if (IsUint<16>(path_to_root)) { +      __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); +    } else { +      __ LoadConst32(TMP, path_to_root); +      __ Xor(temp, temp, TMP); +    } +    // Shift out bits that do not contribute to the comparison. +    __ Sll(temp, temp, 32 - mask_bits); +  } +} +  void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {    __ Sync(0);  // only stype 0 is supported  } @@ -2854,7 +2882,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations =        new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));  } @@ -2863,7 +2897,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); -  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); +  Location cls = locations->InAt(1);    Location temp_loc = locations->GetTemp(0);    GpuRegister temp = temp_loc.AsRegister<GpuRegister>();    const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2902,7 +2936,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {                                          kWithoutReadBarrier);        // Jump to slow path for throwing the exception or doing a        // more involved array check. -      __ Bnec(temp, cls, slow_path->GetEntryLabel()); +      __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());        break;      } @@ -2928,7 +2962,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {        // exception.        __ Beqzc(temp, slow_path->GetEntryLabel());        // Otherwise, compare the classes. -      __ Bnec(temp, cls, &loop); +      __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);        break;      } @@ -2943,7 +2977,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {        // Walk over the class hierarchy to find a match.        Mips64Label loop;        __ Bind(&loop); -      __ Beqc(temp, cls, &done); +      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);        // /* HeapReference<Class> */ temp = temp->super_class_        GenerateReferenceLoadOneRegister(instruction,                                         temp_loc, @@ -2966,7 +3000,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {                                          maybe_temp2_loc,                                          kWithoutReadBarrier);        // Do an exact check. -      __ Beqc(temp, cls, &done); +      __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);        // Otherwise, we need to check that the object's class is a non-primitive array.        // /* HeapReference<Class> */ temp = temp->component_type_        GenerateReferenceLoadOneRegister(instruction, @@ -3025,7 +3059,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {        __ Daddiu(temp, temp, 2 * kHeapReferenceSize);        __ Addiu(TMP, TMP, -2);        // Compare the classes and continue the loop if they do not match. -      __ Bnec(AT, cls, &loop); +      __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); +      break; +    } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp2_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp); +      __ Bnezc(temp, slow_path->GetEntryLabel());        break;      }    } @@ -5529,6 +5577,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -5537,7 +5587,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::RequiresRegister()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::RequiresRegister()); +  }    // The output does overlap inputs.    // Note that TypeCheckSlowPathMIPS64 uses this register too.    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5549,7 +5605,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {    LocationSummary* locations = instruction->GetLocations();    Location obj_loc = locations->InAt(0);    GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); -  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); +  Location cls = locations->InAt(1);    Location out_loc = locations->Out();    GpuRegister out = out_loc.AsRegister<GpuRegister>();    const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5581,7 +5637,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {                                          maybe_temp_loc,                                          read_barrier_option);        // Classes must be equal for the instanceof to succeed. -      __ Xor(out, out, cls); +      __ Xor(out, out, cls.AsRegister<GpuRegister>());        __ Sltiu(out, out, 1);        break;      } @@ -5608,7 +5664,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {                                         read_barrier_option);        // If `out` is null, we use it for the result, and jump to `done`.        __ Beqzc(out, &done); -      __ Bnec(out, cls, &loop); +      __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);        __ LoadConst32(out, 1);        break;      } @@ -5626,7 +5682,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {        // Walk over the class hierarchy to find a match.        Mips64Label loop, success;        __ Bind(&loop); -      __ Beqc(out, cls, &success); +      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);        // /* HeapReference<Class> */ out = out->super_class_        GenerateReferenceLoadOneRegister(instruction,                                         out_loc, @@ -5653,7 +5709,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {                                          read_barrier_option);        // Do an exact check.        Mips64Label success; -      __ Beqc(out, cls, &success); +      __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);        // Otherwise, we need to check that the object's class is a non-primitive array.        // /* HeapReference<Class> */ out = out->component_type_        GenerateReferenceLoadOneRegister(instruction, @@ -5685,7 +5741,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {        slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(            instruction, /* is_fatal */ false);        codegen_->AddSlowPath(slow_path); -      __ Bnec(out, cls, slow_path->GetEntryLabel()); +      __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());        __ LoadConst32(out, 1);        break;      } @@ -5717,6 +5773,20 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {        __ Bc(slow_path->GetEntryLabel());        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        maybe_temp_loc, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out); +      __ Sltiu(out, out, 1); +      break; +    }    }    __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 74c947e5d5..6e69e4611a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {   private:    void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);    void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);    void HandleBinaryOp(HBinaryOperation* operation);    void HandleCondition(HCondition* instruction); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4818084a72..4053f557d9 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6571,6 +6571,26 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(    // No need for memory fence, thanks to the X86 memory model.  } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                                                    Register temp) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  if (mask_bits == 16u) { +    // Compare the bitstring in memory. +    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ movl(temp, Address(temp, mirror::Class::StatusOffset())); +    // Compare the bitstring bits using SUB. +    __ subl(temp, Immediate(path_to_root)); +    // Shift out bits that do not contribute to the comparison. +    __ shll(temp, Immediate(32u - mask_bits)); +  } +} +  HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(      HLoadString::LoadKind desired_string_load_kind) {    switch (desired_string_load_kind) { @@ -6764,6 +6784,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -6772,7 +6794,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::Any()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::Any()); +  }    // Note that TypeCheckSlowPathX86 uses this "out" register too.    locations->SetOut(Location::RequiresRegister());    // When read barriers are enabled, we need a temporary register for some cases. @@ -6993,6 +7021,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {        }        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out); +      __ j(kNotEqual, &zero); +      __ movl(out, Immediate(1)); +      __ jmp(&done); +      break; +    }    }    if (zero.IsLinked()) { @@ -7019,6 +7062,10 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {      // Require a register for the interface check since there is a loop that compares the class to      // a memory address.      locations->SetInAt(1, Location::RequiresRegister()); +  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));    } else {      locations->SetInAt(1, Location::Any());    } @@ -7238,6 +7285,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {        __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp); +      __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); +      break; +    }    }    __ Bind(&done); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 9c537a7371..6c76e27d35 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -211,6 +211,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {    // the suspend call.    void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);    void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);    void HandleBitwiseOperation(HBinaryOperation* instruction);    void GenerateDivRemIntegral(HBinaryOperation* instruction);    void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index c378c5b957..496d79d6c8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5716,6 +5716,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(    // No need for memory fence, thanks to the x86-64 memory model.  } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, +                                                                       CpuRegister temp) { +  uint32_t path_to_root = check->GetBitstringPathToRoot(); +  uint32_t mask = check->GetBitstringMask(); +  DCHECK(IsPowerOfTwo(mask + 1)); +  size_t mask_bits = WhichPowerOf2(mask + 1); + +  if (mask_bits == 16u) { +    // Compare the bitstring in memory. +    __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); +  } else { +    // /* uint32_t */ temp = temp->status_ +    __ movl(temp, Address(temp, mirror::Class::StatusOffset())); +    // Compare the bitstring bits using SUB. +    __ subl(temp, Immediate(path_to_root)); +    // Shift out bits that do not contribute to the comparison. +    __ shll(temp, Immediate(32u - mask_bits)); +  } +} +  HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(      HLoadClass::LoadKind desired_class_load_kind) {    switch (desired_class_load_kind) { @@ -6082,6 +6102,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {      case TypeCheckKind::kInterfaceCheck:        call_kind = LocationSummary::kCallOnSlowPath;        break; +    case TypeCheckKind::kBitstringCheck: +      break;    }    LocationSummary* locations = @@ -6090,7 +6112,13 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {      locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.    }    locations->SetInAt(0, Location::RequiresRegister()); -  locations->SetInAt(1, Location::Any()); +  if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); +  } else { +    locations->SetInAt(1, Location::Any()); +  }    // Note that TypeCheckSlowPathX86_64 uses this "out" register too.    locations->SetOut(Location::RequiresRegister());    // When read barriers are enabled, we need a temporary register for @@ -6319,6 +6347,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {        }        break;      } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        out_loc, +                                        obj_loc, +                                        class_offset, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, out); +      if (zero.IsLinked()) { +        __ j(kNotEqual, &zero); +        __ movl(out, Immediate(1)); +        __ jmp(&done); +      } else { +        __ setcc(kEqual, out); +        // setcc only sets the low byte. +        __ andl(out, Immediate(1)); +      } +      break; +    }    }    if (zero.IsLinked()) { @@ -6345,6 +6394,10 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {      // Require a register for the interface check since there is a loop that compares the class to      // a memory address.      locations->SetInAt(1, Location::RequiresRegister()); +  } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { +    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); +    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); +    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));    } else {      locations->SetInAt(1, Location::Any());    } @@ -6531,7 +6584,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {        break;      } -    case TypeCheckKind::kInterfaceCheck: +    case TypeCheckKind::kInterfaceCheck: {        // Fast path for the interface check. Try to avoid read barriers to improve the fast path.        // We can not get false positives by doing this.        // /* HeapReference<Class> */ temp = obj->klass_ @@ -6567,6 +6620,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {        // If `cls` was poisoned above, unpoison it.        __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());        break; +    } + +    case TypeCheckKind::kBitstringCheck: { +      // /* HeapReference<Class> */ temp = obj->klass_ +      GenerateReferenceLoadTwoRegisters(instruction, +                                        temp_loc, +                                        obj_loc, +                                        class_offset, +                                        kWithoutReadBarrier); + +      GenerateBitstringTypeCheckCompare(instruction, temp); +      __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); +      break; +    }    }    if (done.IsLinked()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e8d1efe702..9a4c53b524 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -208,6 +208,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {    // the suspend call.    void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);    void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); +  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);    void HandleBitwiseOperation(HBinaryOperation* operation);    void GenerateRemFP(HRem* rem);    void DivRemOneOrMinusOne(HBinaryOperation* instruction); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610..fbcbe3608e 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@  #include "base/bit_vector-inl.h"  #include "base/scoped_arena_allocator.h"  #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h"  namespace art { @@ -548,30 +553,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {    }  } -void GraphChecker::VisitCheckCast(HCheckCast* check) { -  VisitInstruction(check); -  HInstruction* input = check->InputAt(1); -  if (!input->IsLoadClass()) { -    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, +                                                size_t input_pos, +                                                bool check_value, +                                                uint32_t expected_value, +                                                const char* name) { +  if (!check->InputAt(input_pos)->IsIntConstant()) { +    AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",                            check->DebugName(),                            check->GetId(), -                          input->DebugName(), -                          input->GetId())); +                          input_pos, +                          name, +                          check->InputAt(2)->DebugName(), +                          check->InputAt(2)->GetId())); +  } else if (check_value) { +    uint32_t actual_value = +        static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); +    if (actual_value != expected_value) { +      AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", +                            check->DebugName(), +                            check->GetId(), +                            name, +                            actual_value, +                            expected_value)); +    }    }  } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { -  VisitInstruction(instruction); -  HInstruction* input = instruction->InputAt(1); -  if (!input->IsLoadClass()) { -    AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", -                          instruction->DebugName(), -                          instruction->GetId(), -                          input->DebugName(), -                          input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { +  VisitInstruction(check); +  HInstruction* input = check->InputAt(1); +  if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { +    if (!input->IsNullConstant()) { +      AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", +                            check->DebugName(), +                            check->GetId(), +                            input->DebugName(), +                            input->GetId())); +    } +    bool check_values = false; +    BitString::StorageType expected_path_to_root = 0u; +    BitString::StorageType expected_mask = 0u; +    { +      ScopedObjectAccess soa(Thread::Current()); +      ObjPtr<mirror::Class> klass = check->GetClass().Get(); +      MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); +      SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); +      if (state == SubtypeCheckInfo::kAssigned) { +        expected_path_to_root = +            SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); +        expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); +        check_values = true; +      } else { +        AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", +                              check->DebugName(), +                              check->GetId())); +      } +    } +    CheckTypeCheckBitstringInput( +        check, /* input_pos */ 2, check_values, expected_path_to_root, "path_to_root"); +    CheckTypeCheckBitstringInput(check, /* input_pos */ 3, check_values, expected_mask, "mask"); +  } else { +    if (!input->IsLoadClass()) { +      AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", +                            check->DebugName(), +                            check->GetId(), +                            input->DebugName(), +                            input->GetId())); +    }    }  } +void GraphChecker::VisitCheckCast(HCheckCast* check) { +  HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { +  HandleTypeCheckInstruction(instruction); +} +  void GraphChecker::HandleLoop(HBasicBlock* loop_header) {    int id = loop_header->GetBlockId();    HLoopInformation* loop_information = loop_header->GetLoopInformation(); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240..dbedc40518 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -71,6 +71,12 @@ class GraphChecker : public HGraphDelegateVisitor {    void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;    void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; +  void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, +                                    size_t input_pos, +                                    bool check_value, +                                    uint32_t expected_value, +                                    const char* name); +  void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);    void HandleLoop(HBasicBlock* loop_header);    void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5ff31cead5..54d4644580 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -390,16 +390,23 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {      StartAttributeStream("load_kind") << load_string->GetLoadKind();    } -  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { -    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); +  void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { +    StartAttributeStream("check_kind") << check->GetTypeCheckKind();      StartAttributeStream("must_do_null_check") << std::boolalpha -        << check_cast->MustDoNullCheck() << std::noboolalpha; +        << check->MustDoNullCheck() << std::noboolalpha; +    if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { +      StartAttributeStream("path_to_root") << std::hex +          << "0x" << check->GetBitstringPathToRoot() << std::dec; +      StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; +    } +  } + +  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { +    HandleTypeCheckInstruction(check_cast);    }    void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { -    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); -    StartAttributeStream("must_do_null_check") << std::boolalpha -        << instance_of->MustDoNullCheck() << std::noboolalpha; +    HandleTypeCheckInstruction(instance_of);    }    void VisitArrayLength(HArrayLength* array_length) OVERRIDE { @@ -576,6 +583,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {        }        StartAttributeStream() << input_list;      } +    if (instruction->GetDexPc() != kNoDexPc) { +      StartAttributeStream("dex_pc") << instruction->GetDexPc(); +    } else { +      StartAttributeStream("dex_pc") << "n/a"; +    }      instruction->Accept(this);      if (instruction->HasEnvironment()) {        StringList envs; @@ -641,20 +653,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {            << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;      } +    // For the builder and the inliner, we want to add extra information on HInstructions +    // that have reference types, and also HInstanceOf/HCheckcast.      if ((IsPass(HGraphBuilder::kBuilderPassName)          || IsPass(HInliner::kInlinerPassName)) -        && (instruction->GetType() == DataType::Type::kReference)) { -      ReferenceTypeInfo info = instruction->IsLoadClass() -        ? instruction->AsLoadClass()->GetLoadedClassRTI() -        : instruction->GetReferenceTypeInfo(); +        && (instruction->GetType() == DataType::Type::kReference || +            instruction->IsInstanceOf() || +            instruction->IsCheckCast())) { +      ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) +          ? instruction->IsLoadClass() +              ? instruction->AsLoadClass()->GetLoadedClassRTI() +              : instruction->GetReferenceTypeInfo() +          : instruction->IsInstanceOf() +              ? instruction->AsInstanceOf()->GetTargetClassRTI() +              : instruction->AsCheckCast()->GetTargetClassRTI();        ScopedObjectAccess soa(Thread::Current());        if (info.IsValid()) {          StartAttributeStream("klass")              << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); -        StartAttributeStream("can_be_null") -            << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; +        if (instruction->GetType() == DataType::Type::kReference) { +          StartAttributeStream("can_be_null") +              << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; +        }          StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; -      } else if (instruction->IsLoadClass()) { +      } else if (instruction->IsLoadClass() || +                 instruction->IsInstanceOf() || +                 instruction->IsCheckCast()) {          StartAttributeStream("klass") << "unresolved";        } else {          // The NullConstant may be added to the graph during other passes that happen between diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 0a310ca940..55eca2316a 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -352,13 +352,15 @@ void InductionVarRange::Replace(HInstruction* instruction,  }  bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { -  HInductionVarAnalysis::InductionInfo *trip = -      induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); -  if (trip != nullptr && !IsUnsafeTripCount(trip)) { -    IsConstant(trip->op_a, kExact, trip_count); -    return true; -  } -  return false; +  bool is_constant_unused = false; +  return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count); +} + +bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop, +                                          /*out*/ int64_t* trip_count) const { +  bool is_constant = false; +  CheckForFiniteAndConstantProps(loop, &is_constant, trip_count); +  return is_constant;  }  bool InductionVarRange::IsUnitStride(HInstruction* context, @@ -417,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,  // Private class methods.  // +bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop, +                                                       /*out*/ bool* is_constant, +                                                       /*out*/ int64_t* trip_count) const { +  HInductionVarAnalysis::InductionInfo *trip = +      induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); +  if (trip != nullptr && !IsUnsafeTripCount(trip)) { +    *is_constant = IsConstant(trip->op_a, kExact, trip_count); +    return true; +  } +  return false; +} +  bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,                                     ConstantRequest request,                                     /*out*/ int64_t* value) const { diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0b980f596a..906dc6bb7b 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -161,9 +161,15 @@ class InductionVarRange {    }    /** -   * Checks if header logic of a loop terminates. Sets trip-count tc if known. +   * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its +   * value.     */ -  bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; +  bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; + +  /** +   * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case. +   */ +  bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;    /**     * Checks if the given instruction is a unit stride induction inside the closest enveloping @@ -194,6 +200,14 @@ class InductionVarRange {    };    /** +   * Checks if header logic of a loop terminates. If trip count is known (constant) sets +   * 'is_constant' to true and 'trip_count' to the trip count value. +   */ +  bool CheckForFiniteAndConstantProps(HLoopInformation* loop, +                                      /*out*/ bool* is_constant, +                                      /*out*/ int64_t* trip_count) const; + +  /**     * Returns true if exact or upper/lower bound on the given induction     * information is known as a 64-bit constant, which is returned in value.     */ diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index c7aef3779d..9647dd5d41 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1815,29 +1815,6 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,    }  } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) -    REQUIRES_SHARED(Locks::mutator_lock_) { -  if (cls == nullptr) { -    return TypeCheckKind::kUnresolvedCheck; -  } else if (cls->IsInterface()) { -    return TypeCheckKind::kInterfaceCheck; -  } else if (cls->IsArrayClass()) { -    if (cls->GetComponentType()->IsObjectClass()) { -      return TypeCheckKind::kArrayObjectCheck; -    } else if (cls->CannotBeAssignedFromOtherTypes()) { -      return TypeCheckKind::kExactCheck; -    } else { -      return TypeCheckKind::kArrayCheck; -    } -  } else if (cls->IsFinal()) { -    return TypeCheckKind::kExactCheck; -  } else if (cls->IsAbstract()) { -    return TypeCheckKind::kAbstractClassCheck; -  } else { -    return TypeCheckKind::kClassHierarchyCheck; -  } -} -  void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {    HLoadString* load_string =        new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); @@ -1852,22 +1829,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_  HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {    ScopedObjectAccess soa(Thread::Current());    const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); -  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); -  Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( -      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - -  bool needs_access_check = true; -  if (klass != nullptr) { -    if (klass->IsPublic()) { -      needs_access_check = false; -    } else { -      ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); -      if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { -        needs_access_check = false; -      } -    } -  } - +  Handle<mirror::Class> klass = ResolveClass(soa, type_index); +  bool needs_access_check = LoadClassNeedsAccessCheck(klass);    return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);  } @@ -1912,25 +1875,83 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,    return load_class;  } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, +                                                        dex::TypeIndex type_index) { +  Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); +  ObjPtr<mirror::Class> klass = compiler_driver_->ResolveClass( +      soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_); +  // TODO: Avoid creating excessive handles if the method references the same class repeatedly. +  // (Use a map on the local_allocator_.) +  return handles_->NewHandle(klass); +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { +  if (klass == nullptr) { +    return true; +  } else if (klass->IsPublic()) { +    return false; +  } else { +    ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); +    return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); +  } +} +  void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,                                           uint8_t destination,                                           uint8_t reference,                                           dex::TypeIndex type_index,                                           uint32_t dex_pc) {    HInstruction* object = LoadLocal(reference, DataType::Type::kReference); -  HLoadClass* cls = BuildLoadClass(type_index, dex_pc);    ScopedObjectAccess soa(Thread::Current()); -  TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); +  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); +  Handle<mirror::Class> klass = ResolveClass(soa, type_index); +  bool needs_access_check = LoadClassNeedsAccessCheck(klass); +  TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( +      klass.Get(), code_generator_, compiler_driver_, needs_access_check); + +  HInstruction* class_or_null = nullptr; +  HIntConstant* bitstring_path_to_root = nullptr; +  HIntConstant* bitstring_mask = nullptr; +  if (check_kind == TypeCheckKind::kBitstringCheck) { +    // TODO: Allow using the bitstring check also if we need an access check. +    DCHECK(!needs_access_check); +    class_or_null = graph_->GetNullConstant(dex_pc); +    MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); +    uint32_t path_to_root = +        SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); +    uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); +    bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); +    bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); +  } else { +    class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); +  } +  DCHECK(class_or_null != nullptr); +    if (instruction.Opcode() == Instruction::INSTANCE_OF) { -    AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); +    AppendInstruction(new (allocator_) HInstanceOf(object, +                                                   class_or_null, +                                                   check_kind, +                                                   klass, +                                                   dex_pc, +                                                   allocator_, +                                                   bitstring_path_to_root, +                                                   bitstring_mask));      UpdateLocal(destination, current_block_->GetLastInstruction());    } else {      DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);      // We emit a CheckCast followed by a BoundType. CheckCast is a statement      // which may throw. If it succeeds BoundType sets the new type of `object`      // for all subsequent uses. -    AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); +    AppendInstruction( +        new (allocator_) HCheckCast(object, +                                    class_or_null, +                                    check_kind, +                                    klass, +                                    dex_pc, +                                    allocator_, +                                    bitstring_path_to_root, +                                    bitstring_mask));      AppendInstruction(new (allocator_) HBoundType(object, dex_pc));      UpdateLocal(reference, current_block_->GetLastInstruction());    } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c53277..f78829232d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -39,6 +39,7 @@ class DexCompilationUnit;  class HBasicBlockBuilder;  class Instruction;  class OptimizingCompilerStats; +class ScopedObjectAccess;  class SsaBuilder;  class VariableSizedHandleScope; @@ -232,6 +233,12 @@ class HInstructionBuilder : public ValueObject {                               bool needs_access_check)        REQUIRES_SHARED(Locks::mutator_lock_); +  Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) +      REQUIRES_SHARED(Locks::mutator_lock_); + +  bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) +      REQUIRES_SHARED(Locks::mutator_lock_); +    // Returns the outer-most compiling method's class.    ObjPtr<mirror::Class> GetOutermostCompilingClass() const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0b2297d157..676fe6bcb7 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -579,7 +579,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst  // Returns whether doing a type test between the class of `object` against `klass` has  // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, +                                     HInstruction* object, +                                     /*out*/bool* outcome) {    DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";    ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();    ScopedObjectAccess soa(Thread::Current()); @@ -589,7 +591,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo      return false;    } -  ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();    if (!class_rti.IsValid()) {      // Happens when the loaded class is unresolved.      return false; @@ -614,8 +615,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo  void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {    HInstruction* object = check_cast->InputAt(0); -  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); -  if (load_class->NeedsAccessCheck()) { +  if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && +      check_cast->GetTargetClass()->NeedsAccessCheck()) {      // If we need to perform an access check we cannot remove the instruction.      return;    } @@ -633,15 +634,18 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {    // Note: The `outcome` is initialized to please valgrind - the compiler can reorder    // the return value check with the `outcome` check, b/27651442 .    bool outcome = false; -  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { +  if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {      if (outcome) {        check_cast->GetBlock()->RemoveInstruction(check_cast);        MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); -      if (!load_class->HasUses()) { -        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. -        // However, here we know that it cannot because the checkcast was successfull, hence -        // the class was already loaded. -        load_class->GetBlock()->RemoveInstruction(load_class); +      if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { +        HLoadClass* load_class = check_cast->GetTargetClass(); +        if (!load_class->HasUses()) { +          // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. +          // However, here we know that it cannot because the checkcast was successfull, hence +          // the class was already loaded. +          load_class->GetBlock()->RemoveInstruction(load_class); +        }        }      } else {        // Don't do anything for exceptional cases for now. Ideally we should remove @@ -652,8 +656,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {  void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {    HInstruction* object = instruction->InputAt(0); -  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); -  if (load_class->NeedsAccessCheck()) { +  if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && +      instruction->GetTargetClass()->NeedsAccessCheck()) {      // If we need to perform an access check we cannot remove the instruction.      return;    } @@ -676,7 +680,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {    // Note: The `outcome` is initialized to please valgrind - the compiler can reorder    // the return value check with the `outcome` check, b/27651442 .    bool outcome = false; -  if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { +  if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {      MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);      if (outcome && can_be_null) {        // Type test will succeed, we just need a null test. @@ -689,11 +693,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {      }      RecordSimplification();      instruction->GetBlock()->RemoveInstruction(instruction); -    if (outcome && !load_class->HasUses()) { -      // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. -      // However, here we know that it cannot because the instanceof check was successfull, hence -      // the class was already loaded. -      load_class->GetBlock()->RemoveInstruction(load_class); +    if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { +      HLoadClass* load_class = instruction->GetTargetClass(); +      if (!load_class->HasUses()) { +        // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. +        // However, here we know that it cannot because the instanceof check was successfull, hence +        // the class was already loaded. +        load_class->GetBlock()->RemoveInstruction(load_class); +      }      }    }  } diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc new file mode 100644 index 0000000000..cd3bdaf016 --- /dev/null +++ b/compiler/optimizing/loop_analysis.cc @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_analysis.h" + +namespace art { + +void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, +                                                LoopAnalysisInfo* analysis_results) { +  for (HBlocksInLoopIterator block_it(*loop_info); +       !block_it.Done(); +       block_it.Advance()) { +    HBasicBlock* block = block_it.Current(); + +    for (HBasicBlock* successor : block->GetSuccessors()) { +      if (!loop_info->Contains(*successor)) { +        analysis_results->exits_num_++; +      } +    } + +    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { +      HInstruction* instruction = it.Current(); +      if (MakesScalarUnrollingNonBeneficial(instruction)) { +        analysis_results->has_instructions_preventing_scalar_unrolling_ = true; +      } +      analysis_results->instr_num_++; +    } +    analysis_results->bb_num_++; +  } +} + +class Arm64LoopHelper : public ArchDefaultLoopHelper { + public: +  // Scalar loop unrolling parameters and heuristics. +  // +  // Maximum possible unrolling factor. +  static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2; +  // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. +  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40; +  // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. +  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8; + +  // SIMD loop unrolling parameters and heuristics. +  // +  // Maximum possible unrolling factor. +  static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8; +  // Loop's maximum instruction count. Loops with higher count will not be unrolled. +  static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50; + +  bool IsLoopTooBigForScalarUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { +    size_t instr_num = loop_analysis_info->GetNumberOfInstructions(); +    size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks(); +    return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr || +            bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks); +  } + +  uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, +                                    uint64_t trip_count) const OVERRIDE { +    uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor; +    if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { +      return kNoUnrollingFactor; +    } + +    return desired_unrolling_factor; +  } + +  uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, +                                  int64_t trip_count, +                                  uint32_t max_peel, +                                  uint32_t vector_length) const OVERRIDE { +    // Don't unroll with insufficient iterations. +    // TODO: Unroll loops with unknown trip count. +    DCHECK_NE(vector_length, 0u); +    if (trip_count < (2 * vector_length + max_peel)) { +      return kNoUnrollingFactor; +    } +    // Don't unroll for large loop body size. +    uint32_t instruction_count = block->GetInstructions().CountSize(); +    if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { +      return kNoUnrollingFactor; +    } +    // Find a beneficial unroll factor with the following restrictions: +    //  - At least one iteration of the transformed loop should be executed. +    //  - The loop body shouldn't be "too big" (heuristic). + +    uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count; +    uint32_t uf2 = (trip_count - max_peel) / vector_length; +    uint32_t unroll_factor = +        TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor})); +    DCHECK_GE(unroll_factor, 1u); +    return unroll_factor; +  } +}; + +ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa, +                                                     ArenaAllocator* allocator) { +  switch (isa) { +    case InstructionSet::kArm64: { +      return new (allocator) Arm64LoopHelper; +    } +    default: { +      return new (allocator) ArchDefaultLoopHelper; +    } +  } +} + +}  // namespace art diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h new file mode 100644 index 0000000000..bad406f10b --- /dev/null +++ b/compiler/optimizing/loop_analysis.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ + +#include "nodes.h" + +namespace art { + +class LoopAnalysis; + +// No loop unrolling factor (just one copy of the loop-body). +static constexpr uint32_t kNoUnrollingFactor = 1; + +// Class to hold cached information on properties of the loop. +class LoopAnalysisInfo : public ValueObject { + public: +  explicit LoopAnalysisInfo(HLoopInformation* loop_info) +      : bb_num_(0), +        instr_num_(0), +        exits_num_(0), +        has_instructions_preventing_scalar_unrolling_(false), +        loop_info_(loop_info) {} + +  size_t GetNumberOfBasicBlocks() const { return bb_num_; } +  size_t GetNumberOfInstructions() const { return instr_num_; } +  size_t GetNumberOfExits() const { return exits_num_; } + +  bool HasInstructionsPreventingScalarUnrolling() const { +    return has_instructions_preventing_scalar_unrolling_; +  } + +  const HLoopInformation* GetLoopInfo() const { return loop_info_; } + + private: +  // Number of basic blocks in the loop body. +  size_t bb_num_; +  // Number of instructions in the loop body. +  size_t instr_num_; +  // Number of loop's exits. +  size_t exits_num_; +  // Whether the loop has instructions which make scalar loop unrolling non-beneficial. +  bool has_instructions_preventing_scalar_unrolling_; + +  // Corresponding HLoopInformation. +  const HLoopInformation* loop_info_; + +  friend class LoopAnalysis; +}; + +// Placeholder class for methods and routines used to analyse loops, calculate loop properties +// and characteristics. +class LoopAnalysis : public ValueObject { + public: +  // Calculates loops basic properties like body size, exits number, etc. and fills +  // 'analysis_results' with this information. +  static void CalculateLoopBasicProperties(HLoopInformation* loop_info, +                                           LoopAnalysisInfo* analysis_results); + + private: +  // Returns whether an instruction makes scalar loop unrolling non-beneficial. +  // +  // If in the loop body we have a dex/runtime call then its contribution to the whole +  // loop performance will probably prevail. So unrolling optimization will not bring +  // any noticeable performance improvement however will increase the code size. +  static bool MakesScalarUnrollingNonBeneficial(HInstruction* instruction) { +    return (instruction->IsNewArray() || +        instruction->IsNewInstance() || +        instruction->IsUnresolvedInstanceFieldGet() || +        instruction->IsUnresolvedInstanceFieldSet() || +        instruction->IsUnresolvedStaticFieldGet() || +        instruction->IsUnresolvedStaticFieldSet() || +        // TODO: Unroll loops with intrinsified invokes. +        instruction->IsInvoke() || +        // TODO: Unroll loops with ClinitChecks. +        instruction->IsClinitCheck()); +  } +}; + +// +// Helper class which holds target-dependent methods and constants needed for loop optimizations. +// +// To support peeling/unrolling for a new architecture one needs to create new helper class, +// inherit it from this and add implementation for the following methods. +// +class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> { + public: +  virtual ~ArchDefaultLoopHelper() {} + +  // Creates an instance of specialised helper for the target or default helper if the target +  // doesn't support loop peeling and unrolling. +  static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator); + +  // Returns whether the loop is too big for loop unrolling by checking its total number of +  // basic blocks and instructions. +  // +  // If the loop body has too many instructions then unrolling optimization will not bring +  // any noticeable performance improvement however will increase the code size. +  // +  // Returns 'true' by default, should be overridden by particular target loop helper. +  virtual bool IsLoopTooBigForScalarUnrolling( +      LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + +  // Returns optimal scalar unrolling factor for the loop. +  // +  // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. +  virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, +                                            uint64_t trip_count ATTRIBUTE_UNUSED) const { +    return kNoUnrollingFactor; +  } + +  // Returns optimal SIMD unrolling factor for the loop. +  // +  // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. +  virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, +                                          int64_t trip_count ATTRIBUTE_UNUSED, +                                          uint32_t max_peel ATTRIBUTE_UNUSED, +                                          uint32_t vector_length ATTRIBUTE_UNUSED) const { +    return kNoUnrollingFactor; +  } +}; + +}  // namespace art + +#endif  // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 758aca2d0c..71e24de141 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -33,8 +33,8 @@ namespace art {  // Enables vectorization (SIMDization) in the loop optimizer.  static constexpr bool kEnableVectorization = true; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; +// Enables scalar loop unrolling in the loop optimizer. +static constexpr bool kEnableScalarUnrolling = false;  //  // Static helpers. @@ -227,6 +227,7 @@ static bool IsNarrowerOperands(HInstruction* a,                                 /*out*/ HInstruction** r,                                 /*out*/ HInstruction** s,                                 /*out*/ bool* is_unsigned) { +  DCHECK(a != nullptr && b != nullptr);    // Look for a matching sign extension.    DataType::Type stype = HVecOperation::ToSignedType(type);    if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { @@ -247,6 +248,7 @@ static bool IsNarrowerOperand(HInstruction* a,                                DataType::Type type,                                /*out*/ HInstruction** r,                                /*out*/ bool* is_unsigned) { +  DCHECK(a != nullptr);    // Look for a matching sign extension.    DataType::Type stype = HVecOperation::ToSignedType(type);    if (IsSignExtensionAndGet(a, stype, r)) { @@ -270,20 +272,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type    return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));  } -// Detect up to two instructions a and b, and an acccumulated constant c. -static bool IsAddConstHelper(HInstruction* instruction, -                             /*out*/ HInstruction** a, -                             /*out*/ HInstruction** b, -                             /*out*/ int64_t* c, -                             int32_t depth) { -  static constexpr int32_t kMaxDepth = 8;  // don't search too deep +// Detect up to two added operands a and b and an acccumulated constant c. +static bool IsAddConst(HInstruction* instruction, +                       /*out*/ HInstruction** a, +                       /*out*/ HInstruction** b, +                       /*out*/ int64_t* c, +                       int32_t depth = 8) {  // don't search too deep    int64_t value = 0; +  // Enter add/sub while still within reasonable depth. +  if (depth > 0) { +    if (instruction->IsAdd()) { +      return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) && +             IsAddConst(instruction->InputAt(1), a, b, c, depth - 1); +    } else if (instruction->IsSub() && +               IsInt64AndGet(instruction->InputAt(1), &value)) { +      *c -= value; +      return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1); +    } +  } +  // Otherwise, deal with leaf nodes.    if (IsInt64AndGet(instruction, &value)) {      *c += value;      return true; -  } else if (instruction->IsAdd() && depth <= kMaxDepth) { -    return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && -           IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);    } else if (*a == nullptr) {      *a = instruction;      return true; @@ -291,42 +301,40 @@ static bool IsAddConstHelper(HInstruction* instruction,      *b = instruction;      return true;    } -  return false;  // too many non-const operands +  return false;  // too many operands  } -// Detect a + b + c for an optional constant c. -static bool IsAddConst(HInstruction* instruction, -                       /*out*/ HInstruction** a, -                       /*out*/ HInstruction** b, -                       /*out*/ int64_t* c) { -  if (instruction->IsAdd()) { -    // Try to find a + b and accumulated c. -    if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && -        IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && -        *b != nullptr) { -      return true; +// Detect a + b + c with optional constant c. +static bool IsAddConst2(HGraph* graph, +                        HInstruction* instruction, +                        /*out*/ HInstruction** a, +                        /*out*/ HInstruction** b, +                        /*out*/ int64_t* c) { +  if (IsAddConst(instruction, a, b, c) && *a != nullptr) { +    if (*b == nullptr) { +      // Constant is usually already present, unless accumulated. +      *b = graph->GetConstant(instruction->GetType(), (*c)); +      *c = 0;      } -    // Found a + b. -    *a = instruction->InputAt(0); -    *b = instruction->InputAt(1); -    *c = 0;      return true;    }    return false;  } -// Detect a + c for constant c. -static bool IsAddConst(HInstruction* instruction, -                       /*out*/ HInstruction** a, -                       /*out*/ int64_t* c) { -  if (instruction->IsAdd()) { -    if (IsInt64AndGet(instruction->InputAt(0), c)) { -      *a = instruction->InputAt(1); -      return true; -    } else if (IsInt64AndGet(instruction->InputAt(1), c)) { -      *a = instruction->InputAt(0); -      return true; -    } +// Detect a direct a - b or a hidden a - (-c). +static bool IsSubConst2(HGraph* graph, +                        HInstruction* instruction, +                        /*out*/ HInstruction** a, +                        /*out*/ HInstruction** b) { +  int64_t c = 0; +  if (instruction->IsSub()) { +    *a = instruction->InputAt(0); +    *b = instruction->InputAt(1); +    return true; +  } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) { +    // Constant for the hidden subtraction. +    *b = graph->GetConstant(instruction->GetType(), -c); +    return true;    }    return false;  } @@ -378,7 +386,8 @@ static bool CanSetRange(DataType::Type type,  }  // Accept various saturated addition forms. -static bool IsSaturatedAdd(HInstruction* clippee, +static bool IsSaturatedAdd(HInstruction* a, +                           HInstruction* b,                             DataType::Type type,                             int64_t lo,                             int64_t hi, @@ -390,8 +399,7 @@ static bool IsSaturatedAdd(HInstruction* clippee,    // Tighten the range for signed single clipping on constant.    if (!is_unsigned) {      int64_t c = 0; -    HInstruction* notused = nullptr; -    if (IsAddConst(clippee, ¬used, &c)) { +    if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) {        // For c in proper range and narrower operand r:        //    MIN(r + c,  127) c > 0        // or MAX(r + c, -128) c < 0 (and possibly redundant bound). @@ -413,7 +421,7 @@ static bool IsSaturatedAdd(HInstruction* clippee,  }  // Accept various saturated subtraction forms. -static bool IsSaturatedSub(HInstruction* clippee, +static bool IsSaturatedSub(HInstruction* a,                             DataType::Type type,                             int64_t lo,                             int64_t hi, @@ -425,7 +433,7 @@ static bool IsSaturatedSub(HInstruction* clippee,    // Tighten the range for signed single clipping on constant.    if (!is_unsigned) {      int64_t c = 0; -    if (IsInt64AndGet(clippee->InputAt(0), /*out*/ &c)) { +    if (IsInt64AndGet(a, /*out*/ &c)) {        // For c in proper range and narrower operand r:        //    MIN(c - r,  127) c > 0        // or MAX(c - r, -128) c < 0 (and possibly redundant bound). @@ -532,7 +540,11 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,        vector_preheader_(nullptr),        vector_header_(nullptr),        vector_body_(nullptr), -      vector_index_(nullptr) { +      vector_index_(nullptr), +      arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr +                                                          ? compiler_driver_->GetInstructionSet() +                                                          : InstructionSet::kNone, +                                                      global_allocator_)) {  }  void HLoopOptimization::Run() { @@ -743,7 +755,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {    }  } -bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {    HBasicBlock* header = node->loop_info->GetHeader();    HBasicBlock* preheader = node->loop_info->GetPreHeader();    // Ensure loop header logic is finite. @@ -813,6 +825,83 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {    return false;  } +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +  return TryOptimizeInnerLoopFinite(node) || +         TryUnrollingForBranchPenaltyReduction(node); +} + +void HLoopOptimization::PeelOrUnrollOnce(LoopNode* loop_node, +                                         bool do_unrolling, +                                         SuperblockCloner::HBasicBlockMap* bb_map, +                                         SuperblockCloner::HInstructionMap* hir_map) { +  // TODO: peel loop nests. +  DCHECK(loop_node->inner == nullptr); + +  // Check that loop info is up-to-date. +  HLoopInformation* loop_info = loop_node->loop_info; +  HBasicBlock* header = loop_info->GetHeader(); +  DCHECK(loop_info == header->GetLoopInformation()); + +  PeelUnrollHelper helper(loop_info, bb_map, hir_map); +  DCHECK(helper.IsLoopClonable()); +  HBasicBlock* new_header = do_unrolling ? helper.DoUnrolling() : helper.DoPeeling(); +  DCHECK(header == new_header); +  DCHECK(loop_info == new_header->GetLoopInformation()); +} + +// +// Loop unrolling: generic part methods. +// + +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node) { +  // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) +  // as InstructionSet is needed. +  if (!kEnableScalarUnrolling || compiler_driver_ == nullptr) { +    return false; +  } + +  HLoopInformation* loop_info = loop_node->loop_info; +  int64_t trip_count = 0; +  // Only unroll loops with a known tripcount. +  if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { +    return false; +  } + +  uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count); +  if (unrolling_factor == kNoUnrollingFactor) { +    return false; +  } + +  LoopAnalysisInfo loop_analysis_info(loop_info); +  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + +  // Check "IsLoopClonable" last as it can be time-consuming. +  if (arch_loop_helper_->IsLoopTooBigForScalarUnrolling(&loop_analysis_info) || +      (loop_analysis_info.GetNumberOfExits() > 1) || +      loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || +      !PeelUnrollHelper::IsLoopClonable(loop_info)) { +    return false; +  } + +  // TODO: support other unrolling factors. +  DCHECK_EQ(unrolling_factor, 2u); + +  // Perform unrolling. +  ArenaAllocator* arena = loop_info->GetHeader()->GetGraph()->GetAllocator(); +  SuperblockCloner::HBasicBlockMap bb_map( +      std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); +  SuperblockCloner::HInstructionMap hir_map( +      std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); +  PeelOrUnrollOnce(loop_node, /* unrolling */ true, &bb_map, &hir_map); + +  // Remove the redundant loop check after unrolling. +  HIf* copy_hif = bb_map.Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); +  int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; +  copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + +  return true; +} +  //  // Loop vectorization. The implementation is based on the book by Aart J.C. Bik:  // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." @@ -943,7 +1032,8 @@ void HLoopOptimization::Vectorize(LoopNode* node,    HBasicBlock* preheader = node->loop_info->GetPreHeader();    // Pick a loop unrolling factor for the vector loop. -  uint32_t unroll = GetUnrollingFactor(block, trip_count); +  uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor( +      block, trip_count, MaxNumberPeeled(), vector_length_);    uint32_t chunk = vector_length_ * unroll;    DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk)); @@ -1439,8 +1529,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,        return false;  // reject, unless all operands are same-extension narrower      }      // Accept MIN/MAX(x, y) for vectorizable operands. -    DCHECK(r != nullptr); -    DCHECK(s != nullptr); +    DCHECK(r != nullptr && s != nullptr);      if (generate_code && vector_mode_ != kVector) {  // de-idiom        r = opa;        s = opb; @@ -1944,31 +2033,37 @@ bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node,        instruction->GetType() != DataType::Type::kInt64) {      return false;    } -  // Clipped addition or subtraction? +  // Clipped addition or subtraction on narrower operands? We will try both +  // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending +  // on what clipping values are used, to get most benefits.    int64_t lo = std::numeric_limits<int64_t>::min();    int64_t hi = std::numeric_limits<int64_t>::max();    HInstruction* clippee = FindClippee(instruction, &lo, &hi); -  bool is_add = true; -  if (clippee->IsAdd()) { -    is_add = true; -  } else if (clippee->IsSub()) { -    is_add = false; -  } else { -    return false;  // clippee is not add/sub -  } -  // Addition or subtraction on narrower operands? +  HInstruction* a = nullptr; +  HInstruction* b = nullptr;    HInstruction* r = nullptr;    HInstruction* s = nullptr;    bool is_unsigned = false; -  if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) && -      (is_add ? IsSaturatedAdd(clippee, type, lo, hi, is_unsigned) -              : IsSaturatedSub(clippee, type, lo, hi, is_unsigned))) { -    DCHECK(r != nullptr); -    DCHECK(s != nullptr); +  bool is_add = true; +  int64_t c = 0; +  // First try for saturated addition. +  if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 && +      IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && +      IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) { +    is_add = true;    } else { -    return false; +    // Then try again for saturated subtraction. +    a = b = r = s = nullptr; +    if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) && +        IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) && +        IsSaturatedSub(r, type, lo, hi, is_unsigned)) { +      is_add = false; +    } else { +      return false; +    }    }    // Accept saturation idiom for vectorizable operands. +  DCHECK(r != nullptr && s != nullptr);    if (generate_code && vector_mode_ != kVector) {  // de-idiom      r = instruction->InputAt(0);      s = instruction->InputAt(1); @@ -2019,8 +2114,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,      HInstruction* a = nullptr;      HInstruction* b = nullptr;      int64_t       c = 0; -    if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { -      DCHECK(a != nullptr && b != nullptr); +    if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {        // Accept c == 1 (rounded) or c == 0 (not rounded).        bool is_rounded = false;        if (c == 1) { @@ -2042,8 +2136,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,        }        // Accept recognized halving add for vectorizable operands. Vectorized code uses the        // shorthand idiomatic operation. Sequential code uses the original scalar expressions. -      DCHECK(r != nullptr); -      DCHECK(s != nullptr); +      DCHECK(r != nullptr && s != nullptr);        if (generate_code && vector_mode_ != kVector) {  // de-idiom          r = instruction->InputAt(0);          s = instruction->InputAt(1); @@ -2093,19 +2186,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,    HInstruction* v = instruction->InputAt(1);    HInstruction* a = nullptr;    HInstruction* b = nullptr; -  if (v->GetType() == reduction_type && v->IsAbs()) { -    HInstruction* x = v->InputAt(0); -    if (x->GetType() == reduction_type) { -      int64_t c = 0; -      if (x->IsSub()) { -        a = x->InputAt(0); -        b = x->InputAt(1); -      } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { -        b = graph_->GetConstant(reduction_type, -c);  // hidden SUB! -      } -    } -  } -  if (a == nullptr || b == nullptr) { +  if (v->IsAbs() && +      v->GetType() == reduction_type && +      IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { +    DCHECK(a != nullptr && b != nullptr); +  } else {      return false;    }    // Accept same-type or consistent sign extension for narrower-type on operands a and b. @@ -2138,8 +2223,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,    }    // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand    // idiomatic operation. Sequential code uses the original scalar expressions. -  DCHECK(r != nullptr); -  DCHECK(s != nullptr); +  DCHECK(r != nullptr && s != nullptr);    if (generate_code && vector_mode_ != kVector) {  // de-idiom      r = s = v->InputAt(0);    } @@ -2226,41 +2310,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {    return true;  } -static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; -static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; - -uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { -  uint32_t max_peel = MaxNumberPeeled(); -  switch (compiler_driver_->GetInstructionSet()) { -    case InstructionSet::kArm64: { -      // Don't unroll with insufficient iterations. -      // TODO: Unroll loops with unknown trip count. -      DCHECK_NE(vector_length_, 0u); -      if (trip_count < (2 * vector_length_ + max_peel)) { -        return kNoUnrollingFactor; -      } -      // Don't unroll for large loop body size. -      uint32_t instruction_count = block->GetInstructions().CountSize(); -      if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { -        return kNoUnrollingFactor; -      } -      // Find a beneficial unroll factor with the following restrictions: -      //  - At least one iteration of the transformed loop should be executed. -      //  - The loop body shouldn't be "too big" (heuristic). -      uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; -      uint32_t uf2 = (trip_count - max_peel) / vector_length_; -      uint32_t unroll_factor = -          TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); -      DCHECK_GE(unroll_factor, 1u); -      return unroll_factor; -    } -    case InstructionSet::kX86: -    case InstructionSet::kX86_64: -    default: -      return kNoUnrollingFactor; -  } -} -  //  // Helpers.  // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 9414e5a0c6..0120cffa56 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -20,12 +20,15 @@  #include "base/scoped_arena_allocator.h"  #include "base/scoped_arena_containers.h"  #include "induction_var_range.h" +#include "loop_analysis.h"  #include "nodes.h"  #include "optimization.h" +#include "superblock_cloner.h"  namespace art {  class CompilerDriver; +class ArchDefaultLoopHelper;  /**   * Loop optimizations. Builds a loop hierarchy and applies optimizations to @@ -135,10 +138,26 @@ class HLoopOptimization : public HOptimization {    void SimplifyInduction(LoopNode* node);    void SimplifyBlocks(LoopNode* node); -  // Performs optimizations specific to inner loop (empty loop removal, +  // Performs optimizations specific to inner loop with finite header logic (empty loop removal,    // unrolling, vectorization). Returns true if anything changed. +  bool TryOptimizeInnerLoopFinite(LoopNode* node); + +  // Performs optimizations specific to inner loop. Returns true if anything changed.    bool OptimizeInnerLoop(LoopNode* node); +  // Performs loop peeling/unrolling once (depends on the 'do_unrolling'); the transformation +  // preserves the header and the loop info. +  // +  // Note: the function records copying information about blocks and instructions. +  void PeelOrUnrollOnce(LoopNode* loop_node, +                        bool do_unrolling, +                        SuperblockCloner::HBasicBlockMap* bb_map, +                        SuperblockCloner::HInstructionMap* hir_map); + +  // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling +  // opportunities. Returns whether transformation happened. +  bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); +    //    // Vectorization analysis and synthesis.    // @@ -203,7 +222,6 @@ class HLoopOptimization : public HOptimization {                              const ArrayReference* peeling_candidate);    uint32_t MaxNumberPeeled();    bool IsVectorizationProfitable(int64_t trip_count); -  uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);    //    // Helpers. @@ -297,6 +315,9 @@ class HLoopOptimization : public HOptimization {    HBasicBlock* vector_body_;  // body of the new loop    HInstruction* vector_index_;  // normalized index of the new loop +  // Helper for target-specific behaviour for loop optimizations. +  ArchDefaultLoopHelper* arch_loop_helper_; +    friend class LoopOptimizationTest;    DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d3212cbbc0..f784f8f7f3 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -3103,6 +3103,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {        return os << "array_object_check";      case TypeCheckKind::kArrayCheck:        return os << "array_check"; +    case TypeCheckKind::kBitstringCheck: +      return os << "bitstring_check";      default:        LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);        UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index a8fcea2097..79d733060b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -6178,8 +6178,7 @@ class HLoadClass FINAL : public HInstruction {          special_input_(HUserRecord<HInstruction*>(current_method)),          type_index_(type_index),          dex_file_(dex_file), -        klass_(klass), -        loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { +        klass_(klass) {      // Referrers class should not need access check. We never inline unverified      // methods so we can't possibly end up in this situation.      DCHECK(!is_referrers_class || !needs_access_check); @@ -6189,6 +6188,7 @@ class HLoadClass FINAL : public HInstruction {      SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);      SetPackedFlag<kFlagIsInBootImage>(false);      SetPackedFlag<kFlagGenerateClInitCheck>(false); +    SetPackedFlag<kFlagValidLoadedClassRTI>(false);    }    bool IsClonable() const OVERRIDE { return true; } @@ -6243,13 +6243,18 @@ class HLoadClass FINAL : public HInstruction {    }    ReferenceTypeInfo GetLoadedClassRTI() { -    return loaded_class_rti_; +    if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { +      // Note: The is_exact flag from the return value should not be used. +      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); +    } else { +      return ReferenceTypeInfo::CreateInvalid(); +    }    } -  void SetLoadedClassRTI(ReferenceTypeInfo rti) { -    // Make sure we only set exact types (the loaded class should never be merged). -    DCHECK(rti.IsExact()); -    loaded_class_rti_ = rti; +  // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. +  void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { +    DCHECK(klass_ != nullptr); +    SetPackedFlag<kFlagValidLoadedClassRTI>(true);    }    dex::TypeIndex GetTypeIndex() const { return type_index_; } @@ -6302,7 +6307,8 @@ class HLoadClass FINAL : public HInstruction {    static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;    static constexpr size_t kFieldLoadKindSize =        MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); -  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; +  static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; +  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;    static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");    using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; @@ -6329,8 +6335,6 @@ class HLoadClass FINAL : public HInstruction {    const DexFile& dex_file_;    Handle<mirror::Class> klass_; - -  ReferenceTypeInfo loaded_class_rti_;  };  std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6882,72 +6886,159 @@ enum class TypeCheckKind {    kInterfaceCheck,        // No optimization yet when checking against an interface.    kArrayObjectCheck,      // Can just check if the array is not primitive.    kArrayCheck,            // No optimization yet when checking against a generic array. +  kBitstringCheck,        // Compare the type check bitstring.    kLast = kArrayCheck  };  std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction {   public: -  HInstanceOf(HInstruction* object, -              HLoadClass* target_class, -              TypeCheckKind check_kind, -              uint32_t dex_pc) -      : HExpression(kInstanceOf, -                    DataType::Type::kBool, -                    SideEffectsForArchRuntimeCalls(check_kind), -                    dex_pc) { +  HTypeCheckInstruction(InstructionKind kind, +                        HInstruction* object, +                        HInstruction* target_class_or_null, +                        TypeCheckKind check_kind, +                        Handle<mirror::Class> klass, +                        uint32_t dex_pc, +                        ArenaAllocator* allocator, +                        HIntConstant* bitstring_path_to_root, +                        HIntConstant* bitstring_mask, +                        SideEffects side_effects) +      : HVariableInputSizeInstruction( +          kind, +          side_effects, +          dex_pc, +          allocator, +          /* number_of_inputs */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, +          kArenaAllocTypeCheckInputs), +        klass_(klass) {      SetPackedField<TypeCheckKindField>(check_kind);      SetPackedFlag<kFlagMustDoNullCheck>(true); +    SetPackedFlag<kFlagValidTargetClassRTI>(false);      SetRawInputAt(0, object); -    SetRawInputAt(1, target_class); +    SetRawInputAt(1, target_class_or_null); +    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); +    DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); +    if (check_kind == TypeCheckKind::kBitstringCheck) { +      DCHECK(target_class_or_null->IsNullConstant()); +      SetRawInputAt(2, bitstring_path_to_root); +      SetRawInputAt(3, bitstring_mask); +    } else { +      DCHECK(target_class_or_null->IsLoadClass()); +    }    }    HLoadClass* GetTargetClass() const { +    DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);      HInstruction* load_class = InputAt(1);      DCHECK(load_class->IsLoadClass());      return load_class->AsLoadClass();    } -  bool IsClonable() const OVERRIDE { return true; } -  bool CanBeMoved() const OVERRIDE { return true; } +  uint32_t GetBitstringPathToRoot() const { +    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); +    HInstruction* path_to_root = InputAt(2); +    DCHECK(path_to_root->IsIntConstant()); +    return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); +  } -  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { -    return true; +  uint32_t GetBitstringMask() const { +    DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); +    HInstruction* mask = InputAt(3); +    DCHECK(mask->IsIntConstant()); +    return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());    } -  bool NeedsEnvironment() const OVERRIDE { -    return CanCallRuntime(GetTypeCheckKind()); +  bool IsClonable() const OVERRIDE { return true; } +  bool CanBeMoved() const OVERRIDE { return true; } + +  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { +    DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); +    return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();    } -  // Used only in code generation.    bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }    void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }    TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }    bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } -  static bool CanCallRuntime(TypeCheckKind check_kind) { -    // Mips currently does runtime calls for any other checks. -    return check_kind != TypeCheckKind::kExactCheck; +  ReferenceTypeInfo GetTargetClassRTI() { +    if (GetPackedFlag<kFlagValidTargetClassRTI>()) { +      // Note: The is_exact flag from the return value should not be used. +      return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact */ true); +    } else { +      return ReferenceTypeInfo::CreateInvalid(); +    }    } -  static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { -    return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); +  // Target class RTI is marked as valid by RTP if the klass_ is admissible. +  void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { +    DCHECK(klass_ != nullptr); +    SetPackedFlag<kFlagValidTargetClassRTI>(true);    } -  DECLARE_INSTRUCTION(InstanceOf); +  Handle<mirror::Class> GetClass() const { +    return klass_; +  }   protected: -  DEFAULT_COPY_CONSTRUCTOR(InstanceOf); +  DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);   private: -  static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; +  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;    static constexpr size_t kFieldTypeCheckKindSize =        MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));    static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; -  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; +  static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; +  static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;    static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");    using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + +  Handle<mirror::Class> klass_; +}; + +class HInstanceOf FINAL : public HTypeCheckInstruction { + public: +  HInstanceOf(HInstruction* object, +              HInstruction* target_class_or_null, +              TypeCheckKind check_kind, +              Handle<mirror::Class> klass, +              uint32_t dex_pc, +              ArenaAllocator* allocator, +              HIntConstant* bitstring_path_to_root, +              HIntConstant* bitstring_mask) +      : HTypeCheckInstruction(kInstanceOf, +                              object, +                              target_class_or_null, +                              check_kind, +                              klass, +                              dex_pc, +                              allocator, +                              bitstring_path_to_root, +                              bitstring_mask, +                              SideEffectsForArchRuntimeCalls(check_kind)) {} + +  DataType::Type GetType() const OVERRIDE { return DataType::Type::kBool; } + +  bool NeedsEnvironment() const OVERRIDE { +    return CanCallRuntime(GetTypeCheckKind()); +  } + +  static bool CanCallRuntime(TypeCheckKind check_kind) { +    // Mips currently does runtime calls for any other checks. +    return check_kind != TypeCheckKind::kExactCheck; +  } + +  static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { +    return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); +  } + +  DECLARE_INSTRUCTION(InstanceOf); + + protected: +  DEFAULT_COPY_CONSTRUCTOR(InstanceOf);  };  class HBoundType FINAL : public HExpression<1> { @@ -6997,31 +7088,26 @@ class HBoundType FINAL : public HExpression<1> {    ReferenceTypeInfo upper_bound_;  }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast FINAL : public HTypeCheckInstruction {   public:    HCheckCast(HInstruction* object, -             HLoadClass* target_class, +             HInstruction* target_class_or_null,               TypeCheckKind check_kind, -             uint32_t dex_pc) -      : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { -    SetPackedField<TypeCheckKindField>(check_kind); -    SetPackedFlag<kFlagMustDoNullCheck>(true); -    SetRawInputAt(0, object); -    SetRawInputAt(1, target_class); -  } - -  HLoadClass* GetTargetClass() const { -    HInstruction* load_class = InputAt(1); -    DCHECK(load_class->IsLoadClass()); -    return load_class->AsLoadClass(); -  } - -  bool IsClonable() const OVERRIDE { return true; } -  bool CanBeMoved() const OVERRIDE { return true; } - -  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { -    return true; -  } +             Handle<mirror::Class> klass, +             uint32_t dex_pc, +             ArenaAllocator* allocator, +             HIntConstant* bitstring_path_to_root, +             HIntConstant* bitstring_mask) +      : HTypeCheckInstruction(kCheckCast, +                              object, +                              target_class_or_null, +                              check_kind, +                              klass, +                              dex_pc, +                              allocator, +                              bitstring_path_to_root, +                              bitstring_mask, +                              SideEffects::CanTriggerGC()) {}    bool NeedsEnvironment() const OVERRIDE {      // Instruction may throw a CheckCastError. @@ -7030,24 +7116,10 @@ class HCheckCast FINAL : public HTemplateInstruction<2> {    bool CanThrow() const OVERRIDE { return true; } -  bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } -  void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } -  TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } -  bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } -    DECLARE_INSTRUCTION(CheckCast);   protected:    DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: -  static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; -  static constexpr size_t kFieldTypeCheckKindSize = -      MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); -  static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; -  static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; -  static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); -  using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;  };  /** diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 00194ff1fe..e0a9cfb934 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -99,6 +99,7 @@ enum class MethodCompilationStat {    kConstructorFenceRemovedLSE,    kConstructorFenceRemovedPFRA,    kConstructorFenceRemovedCFRE, +  kBitstringTypeCheck,    kJitOutOfMemoryForCommit,    kLastStat  }; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8..59733397bf 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -34,6 +34,20 @@ void PrepareForRegisterAllocation::Run() {    }  } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { +  // Record only those bitstring type checks that make it to the codegen stage. +  if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { +    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); +  } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { +  // Record only those bitstring type checks that make it to the codegen stage. +  if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { +    MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); +  } +} +  void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {    check->ReplaceWith(check->InputAt(0));  } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c1..f6e4d3ef99 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {        "prepare_for_register_allocation";   private: +  void VisitCheckCast(HCheckCast* check_cast) OVERRIDE; +  void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE;    void VisitNullCheck(HNullCheck* check) OVERRIDE;    void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;    void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 67a61fc01d..4030883a57 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -87,6 +87,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {    void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;    void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;    void VisitLoadClass(HLoadClass* load_class) OVERRIDE; +  void VisitInstanceOf(HInstanceOf* load_class) OVERRIDE;    void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;    void VisitLoadString(HLoadString* instr) OVERRIDE;    void VisitLoadException(HLoadException* instr) OVERRIDE; @@ -171,6 +172,12 @@ void ReferenceTypePropagation::ValidateTypes() {                  << "NullCheck " << instr->GetReferenceTypeInfo()                  << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();            } +        } else if (instr->IsInstanceOf()) { +          HInstanceOf* iof = instr->AsInstanceOf(); +          DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); +        } else if (instr->IsCheckCast()) { +          HCheckCast* check = instr->AsCheckCast(); +          DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());          }        }      } @@ -499,8 +506,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*      return;    } -  HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); -  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); +  ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();    if (!class_rti.IsValid()) {      // He have loaded an unresolved class. Don't bother bounding the type.      return; @@ -643,15 +649,20 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(  void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {    ScopedObjectAccess soa(Thread::Current()); -  Handle<mirror::Class> resolved_class = instr->GetClass(); -  if (IsAdmissible(resolved_class.Get())) { -    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( -        resolved_class, /* is_exact */ true)); +  if (IsAdmissible(instr->GetClass().Get())) { +    instr->SetValidLoadedClassRTI();    }    instr->SetReferenceTypeInfo(        ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));  } +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { +  ScopedObjectAccess soa(Thread::Current()); +  if (IsAdmissible(instr->GetClass().Get())) { +    instr->SetValidTargetClassRTI(); +  } +} +  void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {    instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());  } @@ -719,8 +730,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {  }  void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { -  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); -  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();    HBoundType* bound_type = check_cast->GetNext()->AsBoundType();    if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {      // The next instruction is not an uninitialized BoundType. This must be @@ -729,12 +738,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast    }    DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); -  if (class_rti.IsValid()) { +  ScopedObjectAccess soa(Thread::Current()); +  Handle<mirror::Class> klass = check_cast->GetClass(); +  if (IsAdmissible(klass.Get())) {      DCHECK(is_first_run_); -    ScopedObjectAccess soa(Thread::Current()); +    check_cast->SetValidTargetClassRTI();      // This is the first run of RTP and class is resolved. -    bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); -    bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), +    bool is_exact = klass->CannotBeAssignedFromOtherTypes(); +    bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),                                /* CheckCast succeeds for nulls. */ true);    } else {      // This is the first run of RTP and class is unresolved. Remove the binding. diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 7dffb2a378..70b45763af 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -240,6 +240,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(    return load_kind;  } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, +                                            CodeGenerator* codegen, +                                            CompilerDriver* compiler_driver) +    REQUIRES_SHARED(Locks::mutator_lock_) { +  DCHECK(!klass->IsProxyClass()); +  DCHECK(!klass->IsArrayClass()); + +  if (Runtime::Current()->UseJitCompilation()) { +    // If we're JITting, try to assign a type check bitstring (fall through). +  } else if (codegen->GetCompilerOptions().IsBootImage()) { +    const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); +    if (!compiler_driver->IsImageClass(descriptor)) { +      return false; +    } +    // If the target is a boot image class, try to assign a type check bitstring (fall through). +    // (If --force-determinism, this was already done; repeating is OK and yields the same result.) +  } else { +    // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring +    // already assigned in the boot image. +    return false; +  } + +  // Try to assign a type check bitstring. +  MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); +  if ((false) &&  // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() +                  // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 +      kIsDebugBuild && +      codegen->GetCompilerOptions().IsBootImage() && +      codegen->GetCompilerOptions().IsForceDeterminism()) { +    SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); +    CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) +        << klass->PrettyDescriptor() << "/" << old_state +        << " in " << codegen->GetGraph()->PrettyMethod(); +  } +  SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); +  return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, +                                                CodeGenerator* codegen, +                                                CompilerDriver* compiler_driver, +                                                bool needs_access_check) { +  if (klass == nullptr) { +    return TypeCheckKind::kUnresolvedCheck; +  } else if (klass->IsInterface()) { +    return TypeCheckKind::kInterfaceCheck; +  } else if (klass->IsArrayClass()) { +    if (klass->GetComponentType()->IsObjectClass()) { +      return TypeCheckKind::kArrayObjectCheck; +    } else if (klass->CannotBeAssignedFromOtherTypes()) { +      return TypeCheckKind::kExactCheck; +    } else { +      return TypeCheckKind::kArrayCheck; +    } +  } else if (klass->IsFinal()) {  // TODO: Consider using bitstring for final classes. +    return TypeCheckKind::kExactCheck; +  } else if (kBitstringSubtypeCheckEnabled && +             !needs_access_check && +             CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { +    // TODO: We should not need the `!needs_access_check` check but getting rid of that +    // requires rewriting some optimizations in instruction simplifier. +    return TypeCheckKind::kBitstringCheck; +  } else if (klass->IsAbstract()) { +    return TypeCheckKind::kAbstractClassCheck; +  } else { +    return TypeCheckKind::kClassHierarchyCheck; +  } +} +  void HSharpening::ProcessLoadString(      HLoadString* load_string,      CodeGenerator* codegen, diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91e..fa3e948eeb 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -44,12 +44,10 @@ class HSharpening : public HOptimization {    static constexpr const char* kSharpeningPassName = "sharpening"; -  // Used by the builder. -  static void ProcessLoadString(HLoadString* load_string, -                                CodeGenerator* codegen, -                                CompilerDriver* compiler_driver, -                                const DexCompilationUnit& dex_compilation_unit, -                                VariableSizedHandleScope* handles); +  // Used by Sharpening and InstructionSimplifier. +  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, +                                          CodeGenerator* codegen, +                                          CompilerDriver* compiler_driver);    // Used by the builder and the inliner.    static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, @@ -58,10 +56,19 @@ class HSharpening : public HOptimization {                                                     const DexCompilationUnit& dex_compilation_unit)        REQUIRES_SHARED(Locks::mutator_lock_); -  // Used by Sharpening and InstructionSimplifier. -  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, -                                          CodeGenerator* codegen, -                                          CompilerDriver* compiler_driver); +  // Used by the builder. +  static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, +                                            CodeGenerator* codegen, +                                            CompilerDriver* compiler_driver, +                                            bool needs_access_check) +      REQUIRES_SHARED(Locks::mutator_lock_); + +  // Used by the builder. +  static void ProcessLoadString(HLoadString* load_string, +                                CodeGenerator* codegen, +                                CompilerDriver* compiler_driver, +                                const DexCompilationUnit& dex_compilation_unit, +                                VariableSizedHandleScope* handles);   private:    CodeGenerator* codegen_; diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index 04942f9a4a..ee74f1001f 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -853,7 +853,7 @@ void SuperblockCloner::CleanUp() {      }    } -  if (kSuperblockClonerVerify) { +  if (kIsDebugBuild) {      VerifyGraph();    }  } diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 19c9dd471c..afd5a5d6e7 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -25,7 +25,6 @@  namespace art {  static const bool kSuperblockClonerLogging = false; -static const bool kSuperblockClonerVerify = false;  // Represents an edge between two HBasicBlocks.  //  |