summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm.cc112
-rw-r--r--compiler/optimizing/code_generator_arm.h3
-rw-r--r--compiler/optimizing/code_generator_x86.cc15
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc15
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc18
-rw-r--r--compiler/optimizing/intrinsics_x86.cc27
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc8
8 files changed, 128 insertions, 73 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5eaf11e9fb..ab85c12a1d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2531,7 +2531,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2568,13 +2568,18 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
break;
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ adds(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ adc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ adds(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ adc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -2608,7 +2613,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2644,13 +2649,18 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
- DCHECK(second.IsRegisterPair());
- __ subs(out.AsRegisterPairLow<Register>(),
- first.AsRegisterPairLow<Register>(),
- ShifterOperand(second.AsRegisterPairLow<Register>()));
- __ sbc(out.AsRegisterPairHigh<Register>(),
- first.AsRegisterPairHigh<Register>(),
- ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ if (second.IsConstant()) {
+ uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+ GenerateAddLongConst(out, first, -value);
+ } else {
+ DCHECK(second.IsRegisterPair());
+ __ subs(out.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ ShifterOperand(second.AsRegisterPairLow<Register>()));
+ __ sbc(out.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ ShifterOperand(second.AsRegisterPairHigh<Register>()));
+ }
break;
}
@@ -4052,31 +4062,51 @@ bool LocationsBuilderARM::CanEncodeConstantAsImmediate(HConstant* input_cst,
Opcode opcode) {
uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
if (Primitive::Is64BitType(input_cst->GetType())) {
- return CanEncodeConstantAsImmediate(Low32Bits(value), opcode) &&
- CanEncodeConstantAsImmediate(High32Bits(value), opcode);
+ Opcode high_opcode = opcode;
+ SetCc low_set_cc = kCcDontCare;
+ switch (opcode) {
+ case SUB:
+ // Flip the operation to an ADD.
+ value = -value;
+ opcode = ADD;
+ FALLTHROUGH_INTENDED;
+ case ADD:
+ if (Low32Bits(value) == 0u) {
+ return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+ }
+ high_opcode = ADC;
+ low_set_cc = kCcSet;
+ break;
+ default:
+ break;
+ }
+ return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+ CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
} else {
return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
}
}
-bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode) {
+bool LocationsBuilderARM::CanEncodeConstantAsImmediate(uint32_t value,
+ Opcode opcode,
+ SetCc set_cc) {
ShifterOperand so;
ArmAssembler* assembler = codegen_->GetAssembler();
- if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, &so)) {
+ if (assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, opcode, value, set_cc, &so)) {
return true;
}
Opcode neg_opcode = kNoOperand;
switch (opcode) {
- case AND:
- neg_opcode = BIC;
- break;
- case ORR:
- neg_opcode = ORN;
- break;
+ case AND: neg_opcode = BIC; value = ~value; break;
+ case ORR: neg_opcode = ORN; value = ~value; break;
+ case ADD: neg_opcode = SUB; value = -value; break;
+ case ADC: neg_opcode = SBC; value = ~value; break;
+ case SUB: neg_opcode = ADD; value = -value; break;
+ case SBC: neg_opcode = ADC; value = ~value; break;
default:
return false;
}
- return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, ~value, &so);
+ return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
}
void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -6202,6 +6232,34 @@ void InstructionCodeGeneratorARM::GenerateEorConst(Register out, Register first,
__ eor(out, first, ShifterOperand(value));
}
+void InstructionCodeGeneratorARM::GenerateAddLongConst(Location out,
+ Location first,
+ uint64_t value) {
+ Register out_low = out.AsRegisterPairLow<Register>();
+ Register out_high = out.AsRegisterPairHigh<Register>();
+ Register first_low = first.AsRegisterPairLow<Register>();
+ Register first_high = first.AsRegisterPairHigh<Register>();
+ uint32_t value_low = Low32Bits(value);
+ uint32_t value_high = High32Bits(value);
+ if (value_low == 0u) {
+ if (out_low != first_low) {
+ __ mov(out_low, ShifterOperand(first_low));
+ }
+ __ AddConstant(out_high, first_high, value_high);
+ return;
+ }
+ __ AddConstantSetFlags(out_low, first_low, value_low);
+ ShifterOperand so;
+ if (__ ShifterOperandCanHold(out_high, first_high, ADC, value_high, kCcDontCare, &so)) {
+ __ adc(out_high, first_high, so);
+ } else if (__ ShifterOperandCanHold(out_low, first_low, SBC, ~value_high, kCcDontCare, &so)) {
+ __ sbc(out_high, first_high, so);
+ } else {
+ LOG(FATAL) << "Unexpected constant " << value_high;
+ UNREACHABLE();
+ }
+}
+
void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location first = locations->InAt(0);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index fa7709b9a3..5d9b2dce1c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -183,7 +183,7 @@ class LocationsBuilderARM : public HGraphVisitor {
Location ArithmeticZeroOrFpuRegister(HInstruction* input);
Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
- bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode);
+ bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
CodeGeneratorARM* const codegen_;
InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -220,6 +220,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void GenerateAndConst(Register out, Register first, uint32_t value);
void GenerateOrrConst(Register out, Register first, uint32_t value);
void GenerateEorConst(Register out, Register first, uint32_t value);
+ void GenerateAddLongConst(Location out, Location first, uint64_t value);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ade21174f4..a85cd54ff3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index eadb431440..e0013634f1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3429a8fdbb..1a8eb58857 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,9 +27,6 @@ namespace art {
class CompilerDriver;
class DexFile;
-// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
-static constexpr bool kRoundIsPlusPointFive = false;
-
// Positive floating-point infinities.
static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e7c40e6600..e233672705 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Register str = XRegisterFrom(locations->InAt(0));
- Register arg = XRegisterFrom(locations->InAt(1));
+ Register str = InputRegisterAt(invoke, 0);
+ Register arg = InputRegisterAt(invoke, 1);
+ DCHECK(str.IsW());
+ DCHECK(arg.IsW());
Register out = OutputRegister(invoke);
Register temp0 = WRegisterFrom(locations->GetTemp(0));
@@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Subs(out, str, arg);
__ B(&end, eq);
// Load lengths of this and argument strings.
- __ Ldr(temp0, MemOperand(str.X(), count_offset));
- __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+ __ Ldr(temp0, HeapOperand(str, count_offset));
+ __ Ldr(temp1, HeapOperand(arg, count_offset));
// Return zero if both strings are empty.
__ Orr(out, temp0, temp1);
__ Cbz(out, &end);
@@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
__ Bind(&loop);
- __ Ldr(temp4, MemOperand(str.X(), temp1));
- __ Ldr(temp0, MemOperand(arg.X(), temp1));
+ __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+ __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
__ Cmp(temp4, temp0);
__ B(ne, &find_char_diff);
__ Add(temp1, temp1, char_size * 4);
@@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Clz(temp1, temp1);
// If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
// the difference occurs outside the remaining string data, so just return length diff (out).
- __ Cmp(temp2, Operand(temp1, LSR, 4));
+ __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
__ B(le, &end);
// Extract the characters and calculate the difference.
__ Bic(temp1, temp1, 0xf);
__ Lsr(temp0, temp0, temp1);
__ Lsr(temp4, temp4, temp1);
__ And(temp4, temp4, 0xffff);
- __ Sub(out, temp4, Operand(temp0, UXTH));
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
__ Bind(&end);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index dc409c92d6..22f4181b92 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
-
// Do we have instruction support?
if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
@@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
}
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
- Register constant_area = locations->InAt(1).AsRegister<Register>();
XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Register out = locations->Out().AsRegister<Register>();
@@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
__ movss(t2, in);
__ roundss(t1, in, Immediate(1));
__ subss(t2, t1);
- __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
- __ j(kBelow, &skip_incr);
- __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
- __ Bind(&skip_incr);
+ if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+ // Direct constant area available.
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+ __ Bind(&skip_incr);
+ } else {
+ // No constant area: go through stack.
+ __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+ __ comiss(t2, Address(ESP, 4));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, Address(ESP, 0));
+ __ Bind(&skip_incr);
+ __ addl(ESP, Immediate(8));
+ }
// Final conversion to an integer. Unfortunately this also does not have a
// direct x86 instruction, since NaN should map to 0 and large positive
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7dfbfb09be..ab8b05c3d4 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
@@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}