summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/instruction_builder.cc54
-rw-r--r--compiler/optimizing/instruction_builder.h1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc89
3 files changed, 114 insertions, 30 deletions
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 12cb826395..00cd2f659d 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -215,6 +215,17 @@ void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) {
}
}
+HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) {
+ HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot);
+ if (!ref->CanBeNull()) {
+ return ref;
+ }
+
+ HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc);
+ AppendInstruction(null_check);
+ return null_check;
+}
+
void HInstructionBuilder::SetLoopHeaderPhiInputs() {
for (size_t i = loop_headers_.size(); i > 0; --i) {
HBasicBlock* block = loop_headers_[i - 1];
@@ -1084,10 +1095,9 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
size_t start_index = 0;
size_t argument_index = 0;
if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) { // Instance call.
- HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
- HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
- AppendInstruction(null_check);
- invoke->SetArgumentAt(0, null_check);
+ HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0],
+ invoke->GetDexPc());
+ invoke->SetArgumentAt(0, arg);
start_index = 1;
argument_index = 1;
}
@@ -1193,9 +1203,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
- HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
- HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
- AppendInstruction(null_check);
+ HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc);
Primitive::Type field_type = (resolved_field == nullptr)
? GetFieldAccessType(*dex_file_, field_index)
@@ -1205,14 +1213,14 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
HInstruction* field_set = nullptr;
if (resolved_field == nullptr) {
MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
- field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
+ field_set = new (arena_) HUnresolvedInstanceFieldSet(object,
value,
field_type,
field_index,
dex_pc);
} else {
uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
- field_set = new (arena_) HInstanceFieldSet(null_check,
+ field_set = new (arena_) HInstanceFieldSet(object,
value,
field_type,
resolved_field->GetOffset(),
@@ -1228,13 +1236,13 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
HInstruction* field_get = nullptr;
if (resolved_field == nullptr) {
MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
- field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
+ field_get = new (arena_) HUnresolvedInstanceFieldGet(object,
field_type,
field_index,
dex_pc);
} else {
uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
- field_get = new (arena_) HInstanceFieldGet(null_check,
+ field_get = new (arena_) HInstanceFieldGet(object,
field_type,
resolved_field->GetOffset(),
resolved_field->IsVolatile(),
@@ -1449,10 +1457,7 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
uint8_t array_reg = instruction.VRegB_23x();
uint8_t index_reg = instruction.VRegC_23x();
- HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
- object = new (arena_) HNullCheck(object, dex_pc);
- AppendInstruction(object);
-
+ HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc);
HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
AppendInstruction(length);
HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
@@ -1527,11 +1532,8 @@ void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
}
void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
- HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
- HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
- AppendInstruction(null_check);
-
- HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
+ HInstruction* array = LoadNullCheckedLocal(instruction.VRegA_31t(), dex_pc);
+ HInstruction* length = new (arena_) HArrayLength(array, dex_pc);
AppendInstruction(length);
int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
@@ -1547,28 +1549,28 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin
switch (payload->element_width) {
case 1:
- BuildFillArrayData(null_check,
+ BuildFillArrayData(array,
reinterpret_cast<const int8_t*>(data),
element_count,
Primitive::kPrimByte,
dex_pc);
break;
case 2:
- BuildFillArrayData(null_check,
+ BuildFillArrayData(array,
reinterpret_cast<const int16_t*>(data),
element_count,
Primitive::kPrimShort,
dex_pc);
break;
case 4:
- BuildFillArrayData(null_check,
+ BuildFillArrayData(array,
reinterpret_cast<const int32_t*>(data),
element_count,
Primitive::kPrimInt,
dex_pc);
break;
case 8:
- BuildFillWideArrayData(null_check,
+ BuildFillWideArrayData(array,
reinterpret_cast<const int64_t*>(data),
element_count,
dex_pc);
@@ -2575,9 +2577,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
ARRAY_XX(_SHORT, Primitive::kPrimShort);
case Instruction::ARRAY_LENGTH: {
- HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
- object = new (arena_) HNullCheck(object, dex_pc);
- AppendInstruction(object);
+ HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc);
AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
break;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 070f7da80e..0e3e5a7c34 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -87,6 +87,7 @@ class HInstructionBuilder : public ValueObject {
ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+ HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
void UpdateLocal(uint32_t register_index, HInstruction* instruction);
void AppendInstruction(HInstruction* instruction);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index cf973aa841..1524e1e011 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -385,6 +385,92 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
+static void GenBitCount(LocationSummary* locations,
+ const Primitive::Type type,
+ Mips64Assembler* assembler) {
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+ // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ //
+ // A generalization of the best bit counting method to integers of
+ // bit-widths up to 128 (parameterized by type T) is this:
+ //
+ // v = v - ((v >> 1) & (T)~(T)0/3); // temp
+ // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp
+ // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp
+ // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; // count
+ //
+ // For comparison, for 32-bit quantities, this algorithm can be executed
+ // using 20 MIPS instructions (the calls to LoadConst32() generate two
+ // machine instructions each for the values being used in this algorithm).
+ // A(n unrolled) loop-based algorithm requires 25 instructions.
+ //
+ // For a 64-bit operand this can be performed in 24 instructions compared
+ // to a(n unrolled) loop based algorithm which requires 38 instructions.
+ //
+ // There are algorithms which are faster in the cases where very few
+ // bits are set but the algorithm here attempts to minimize the total
+ // number of instructions executed even when a large number of bits
+ // are set.
+
+ if (type == Primitive::kPrimInt) {
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ __ MulR6(out, out, TMP);
+ __ Srl(out, out, 24);
+ } else if (type == Primitive::kPrimLong) {
+ __ Dsrl(TMP, in, 1);
+ __ LoadConst64(AT, 0x5555555555555555L);
+ __ And(TMP, TMP, AT);
+ __ Dsubu(TMP, in, TMP);
+ __ LoadConst64(AT, 0x3333333333333333L);
+ __ And(out, TMP, AT);
+ __ Dsrl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Daddu(TMP, out, TMP);
+ __ Dsrl(out, TMP, 4);
+ __ Daddu(out, out, TMP);
+ __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+ __ And(out, out, AT);
+ __ LoadConst64(TMP, 0x0101010101010101L);
+ __ Dmul(out, out, TMP);
+ __ Dsrl32(out, out, 24);
+ }
+}
+
+// int java.lang.Integer.bitCount(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// int java.lang.Long.bitCount(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1693,9 +1779,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(MIPS64, LongBitCount)
-
UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)