diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/instruction_builder.cc | 54 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_builder.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 89 |
3 files changed, 114 insertions, 30 deletions
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 12cb826395..00cd2f659d 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -215,6 +215,17 @@ void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) { } } +HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) { + HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot); + if (!ref->CanBeNull()) { + return ref; + } + + HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc); + AppendInstruction(null_check); + return null_check; +} + void HInstructionBuilder::SetLoopHeaderPhiInputs() { for (size_t i = loop_headers_.size(); i > 0; --i) { HBasicBlock* block = loop_headers_[i - 1]; @@ -1084,10 +1095,9 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, size_t start_index = 0; size_t argument_index = 0; if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) { // Instance call. - HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot); - HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc()); - AppendInstruction(null_check); - invoke->SetArgumentAt(0, null_check); + HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0], + invoke->GetDexPc()); + invoke->SetArgumentAt(0, arg); start_index = 1; argument_index = 1; } @@ -1193,9 +1203,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa); - HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot); - HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc); - AppendInstruction(null_check); + HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc); Primitive::Type field_type = (resolved_field == nullptr) ? GetFieldAccessType(*dex_file_, field_index) @@ -1205,14 +1213,14 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio HInstruction* field_set = nullptr; if (resolved_field == nullptr) { MaybeRecordStat(MethodCompilationStat::kUnresolvedField); - field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check, + field_set = new (arena_) HUnresolvedInstanceFieldSet(object, value, field_type, field_index, dex_pc); } else { uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); - field_set = new (arena_) HInstanceFieldSet(null_check, + field_set = new (arena_) HInstanceFieldSet(object, value, field_type, resolved_field->GetOffset(), @@ -1228,13 +1236,13 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio HInstruction* field_get = nullptr; if (resolved_field == nullptr) { MaybeRecordStat(MethodCompilationStat::kUnresolvedField); - field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check, + field_get = new (arena_) HUnresolvedInstanceFieldGet(object, field_type, field_index, dex_pc); } else { uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); - field_get = new (arena_) HInstanceFieldGet(null_check, + field_get = new (arena_) HInstanceFieldGet(object, field_type, resolved_field->GetOffset(), resolved_field->IsVolatile(), @@ -1449,10 +1457,7 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction, uint8_t array_reg = instruction.VRegB_23x(); uint8_t index_reg = instruction.VRegC_23x(); - HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot); - object = new (arena_) HNullCheck(object, dex_pc); - AppendInstruction(object); - + HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc); HInstruction* length = new (arena_) HArrayLength(object, dex_pc); AppendInstruction(length); HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt); @@ -1527,11 +1532,8 @@ void HInstructionBuilder::BuildFillArrayData(HInstruction* object, } void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot); - HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc); - AppendInstruction(null_check); - - HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc); + HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc); + HInstruction* length = new (arena_) HArrayLength(array, dex_pc); AppendInstruction(length); int32_t payload_offset = instruction.VRegB_31t() + dex_pc; @@ -1547,28 +1549,28 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin switch (payload->element_width) { case 1: - BuildFillArrayData(null_check, + BuildFillArrayData(array, reinterpret_cast<const int8_t*>(data), element_count, Primitive::kPrimByte, dex_pc); break; case 2: - BuildFillArrayData(null_check, + BuildFillArrayData(array, reinterpret_cast<const int16_t*>(data), element_count, Primitive::kPrimShort, dex_pc); break; case 4: - BuildFillArrayData(null_check, + BuildFillArrayData(array, reinterpret_cast<const int32_t*>(data), element_count, Primitive::kPrimInt, dex_pc); break; case 8: - BuildFillWideArrayData(null_check, + BuildFillWideArrayData(array, reinterpret_cast<const int64_t*>(data), element_count, dex_pc); @@ -2575,9 +2577,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, ARRAY_XX(_SHORT, Primitive::kPrimShort); case Instruction::ARRAY_LENGTH: { - HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot); - object = new (arena_) HNullCheck(object, dex_pc); - AppendInstruction(object); + HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc); AppendInstruction(new (arena_) HArrayLength(object, dex_pc)); UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction()); break; diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 070f7da80e..0e3e5a7c34 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -87,6 +87,7 @@ class HInstructionBuilder : public ValueObject { ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block); HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local); HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const; + HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc); void UpdateLocal(uint32_t register_index, HInstruction* instruction); void AppendInstruction(HInstruction* instruction); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index cf973aa841..1524e1e011 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -385,6 +385,92 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } +static void GenBitCount(LocationSummary* locations, + const Primitive::Type type, + Mips64Assembler* assembler) { + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + // + // A generalization of the best bit counting method to integers of + // bit-widths up to 128 (parameterized by type T) is this: + // + // v = v - ((v >> 1) & (T)~(T)0/3); // temp + // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp + // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp + // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count + // + // For comparison, for 32-bit quantities, this algorithm can be executed + // using 20 MIPS instructions (the calls to LoadConst32() generate two + // machine instructions each for the values being used in this algorithm). + // A(n unrolled) loop-based algorithm requires 25 instructions. + // + // For a 64-bit operand this can be performed in 24 instructions compared + // to a(n unrolled) loop based algorithm which requires 38 instructions. + // + // There are algorithms which are faster in the cases where very few + // bits are set but the algorithm here attempts to minimize the total + // number of instructions executed even when a large number of bits + // are set. + + if (type == Primitive::kPrimInt) { + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + __ MulR6(out, out, TMP); + __ Srl(out, out, 24); + } else if (type == Primitive::kPrimLong) { + __ Dsrl(TMP, in, 1); + __ LoadConst64(AT, 0x5555555555555555L); + __ And(TMP, TMP, AT); + __ Dsubu(TMP, in, TMP); + __ LoadConst64(AT, 0x3333333333333333L); + __ And(out, TMP, AT); + __ Dsrl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Daddu(TMP, out, TMP); + __ Dsrl(out, TMP, 4); + __ Daddu(out, out, TMP); + __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); + __ And(out, out, AT); + __ LoadConst64(TMP, 0x0101010101010101L); + __ Dmul(out, out, TMP); + __ Dsrl32(out, out, 24); + } +} + +// int java.lang.Integer.bitCount(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// int java.lang.Long.bitCount(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); @@ -1693,9 +1779,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } -UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerBitCount) -UNIMPLEMENTED_INTRINSIC(MIPS64, LongBitCount) - UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat) |