diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 112 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 18 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 27 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 8 |
8 files changed, 128 insertions, 73 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 5eaf11e9fb..ab85c12a1d 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -2531,7 +2531,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2568,13 +2568,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { break; case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ adds(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ adc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, value); + } else { + DCHECK(second.IsRegisterPair()); + __ adds(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ adc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -2608,7 +2613,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2644,13 +2649,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { - DCHECK(second.IsRegisterPair()); - __ subs(out.AsRegisterPairLow<Register>(), - first.AsRegisterPairLow<Register>(), - ShifterOperand(second.AsRegisterPairLow<Register>())); - __ sbc(out.AsRegisterPairHigh<Register>(), - first.AsRegisterPairHigh<Register>(), - ShifterOperand(second.AsRegisterPairHigh<Register>())); + if (second.IsConstant()) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); + GenerateAddLongConst(out, first, -value); + } else { + DCHECK(second.IsRegisterPair()); + __ subs(out.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + ShifterOperand(second.AsRegisterPairLow<Register>())); + __ sbc(out.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + ShifterOperand(second.AsRegisterPairHigh<Register>())); + } break; } @@ -4052,31 +4062,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); if (Primitive::Is64BitType(input_cst->GetType())) { - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) && - CanEncodeConstantAsImmediate(High32Bits(value), opcode); + Opcode high_opcode = opcode; + SetCc low_set_cc = kCcDontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); + } + high_opcode = ADC; + low_set_cc = kCcSet; + break; + default: + break; + } + return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && + CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); } else { return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); } } -bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) { +bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, + Opcode opcode, + SetCc set_cc) { ShifterOperand so; ArmAssembler* assembler = codegen_->GetAssembler(); - if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) { + if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) { return true; } Opcode neg_opcode = kNoOperand; switch (opcode) { - case AND: - neg_opcode = BIC; - break; - case ORR: - neg_opcode = ORN; - break; + case AND: neg_opcode = BIC; value = ~value; break; + case ORR: neg_opcode = ORN; value = ~value; break; + case ADD: neg_opcode = SUB; value = -value; break; + case ADC: neg_opcode = SBC; value = ~value; break; + case SUB: neg_opcode = ADD; value = -value; break; + case SBC: neg_opcode = ADC; value = ~value; break; default: return false; } - return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so); + return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so); } void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, @@ -6202,6 +6232,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first, __ eor(out, first, ShifterOperand(value)); } +void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out, + Location first, + uint64_t value) { + Register out_low = out.AsRegisterPairLow<Register>(); + Register out_high = out.AsRegisterPairHigh<Register>(); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); + uint32_t value_low = Low32Bits(value); + uint32_t value_high = High32Bits(value); + if (value_low == 0u) { + if (out_low != first_low) { + __ mov(out_low, ShifterOperand(first_low)); + } + __ AddConstant(out_high, first_high, value_high); + return; + } + __ AddConstantSetFlags(out_low, first_low, value_low); + ShifterOperand so; + if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) { + __ adc(out_high, first_high, so); + } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) { + __ sbc(out_high, first_high, so); + } else { + LOG(FATAL) << "Unexpected constant " << value_high; + UNREACHABLE(); + } +} + void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = instruction->GetLocations(); Location first = locations->InAt(0); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index fa7709b9a3..5d9b2dce1c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -183,7 +183,7 @@ class LocationsBuilderARM : public HGraphVisitor { Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); - bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode); + bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -220,6 +220,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void GenerateAndConst(Register out, Register first, uint32_t value); void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); + void GenerateAddLongConst(Location out, Location first, uint64_t value); void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ade21174f4..a85cd54ff3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eadb431440..e0013634f1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3429a8fdbb..1a8eb58857 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,9 +27,6 @@ namespace art { class CompilerDriver; class DexFile; -// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 -static constexpr bool kRoundIsPlusPointFive = false; - // Positive floating-point infinities. static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e7c40e6600..e233672705 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register str = XRegisterFrom(locations->InAt(0)); - Register arg = XRegisterFrom(locations->InAt(1)); + Register str = InputRegisterAt(invoke, 0); + Register arg = InputRegisterAt(invoke, 1); + DCHECK(str.IsW()); + DCHECK(arg.IsW()); Register out = OutputRegister(invoke); Register temp0 = WRegisterFrom(locations->GetTemp(0)); @@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Subs(out, str, arg); __ B(&end, eq); // Load lengths of this and argument strings. - __ Ldr(temp0, MemOperand(str.X(), count_offset)); - __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); - __ Ldr(temp4, MemOperand(str.X(), temp1)); - __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Ldr(temp4, MemOperand(str.X(), temp1.X())); + __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); @@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Clz(temp1, temp1); // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1, LSR, 4)); + __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); __ B(le, &end); // Extract the characters and calculate the difference. __ Bic(temp1, temp1, 0xf); __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4, Operand(temp0, UXTH)); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); __ Bind(&end); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index dc409c92d6..22f4181b92 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); @@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { } XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); - Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); @@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ movss(t2, in); __ roundss(t1, in, Immediate(1)); __ subss(t2, t1); - __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); - __ j(kBelow, &skip_incr); - __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); - __ Bind(&skip_incr); + if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + // Direct constant area available. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + } else { + // No constant area: go through stack. + __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); + __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); + __ comiss(t2, Address(ESP, 4)); + __ j(kBelow, &skip_incr); + __ addss(t1, Address(ESP, 0)); + __ Bind(&skip_incr); + __ addl(ESP, Immediate(8)); + } // Final conversion to an integer. Unfortunately this also does not have a // direct x86 instruction, since NaN should map to 0 and large positive diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7dfbfb09be..ab8b05c3d4 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } @@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } |