summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_x86.cc15
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc15
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc18
-rw-r--r--compiler/optimizing/intrinsics_x86.cc27
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc8
6 files changed, 41 insertions, 45 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ade21174f4..a85cd54ff3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86 memory model.
@@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index eadb431440..e0013634f1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
- __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
- __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
- static_assert(
- LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
- "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
// Load fence to prevent load-load reordering.
// Note that this is a no-op, thanks to the x86-64 memory model.
@@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// if (rb_state == ReadBarrier::gray_ptr_)
// ref = ReadBarrier::Mark(ref);
- __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
- __ j(kEqual, slow_path->GetEntryLabel());
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 3429a8fdbb..1a8eb58857 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -27,9 +27,6 @@ namespace art {
class CompilerDriver;
class DexFile;
-// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
-static constexpr bool kRoundIsPlusPointFive = false;
-
// Positive floating-point infinities.
static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e7c40e6600..e233672705 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Register str = XRegisterFrom(locations->InAt(0));
- Register arg = XRegisterFrom(locations->InAt(1));
+ Register str = InputRegisterAt(invoke, 0);
+ Register arg = InputRegisterAt(invoke, 1);
+ DCHECK(str.IsW());
+ DCHECK(arg.IsW());
Register out = OutputRegister(invoke);
Register temp0 = WRegisterFrom(locations->GetTemp(0));
@@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Subs(out, str, arg);
__ B(&end, eq);
// Load lengths of this and argument strings.
- __ Ldr(temp0, MemOperand(str.X(), count_offset));
- __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+ __ Ldr(temp0, HeapOperand(str, count_offset));
+ __ Ldr(temp1, HeapOperand(arg, count_offset));
// Return zero if both strings are empty.
__ Orr(out, temp0, temp1);
__ Cbz(out, &end);
@@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
// Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
__ Bind(&loop);
- __ Ldr(temp4, MemOperand(str.X(), temp1));
- __ Ldr(temp0, MemOperand(arg.X(), temp1));
+ __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
+ __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
__ Cmp(temp4, temp0);
__ B(ne, &find_char_diff);
__ Add(temp1, temp1, char_size * 4);
@@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Clz(temp1, temp1);
// If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
// the difference occurs outside the remaining string data, so just return length diff (out).
- __ Cmp(temp2, Operand(temp1, LSR, 4));
+ __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
__ B(le, &end);
// Extract the characters and calculate the difference.
__ Bic(temp1, temp1, 0xf);
__ Lsr(temp0, temp0, temp1);
__ Lsr(temp4, temp4, temp1);
__ And(temp4, temp4, 0xffff);
- __ Sub(out, temp4, Operand(temp0, UXTH));
+ __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
__ Bind(&end);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index dc409c92d6..22f4181b92 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
-
// Do we have instruction support?
if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
@@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
}
XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
- Register constant_area = locations->InAt(1).AsRegister<Register>();
XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Register out = locations->Out().AsRegister<Register>();
@@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
__ movss(t2, in);
__ roundss(t1, in, Immediate(1));
__ subss(t2, t1);
- __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
- __ j(kBelow, &skip_incr);
- __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
- __ Bind(&skip_incr);
+ if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
+ // Direct constant area available.
+ Register constant_area = locations->InAt(1).AsRegister<Register>();
+ __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area));
+ __ Bind(&skip_incr);
+ } else {
+ // No constant area: go through stack.
+ __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
+ __ comiss(t2, Address(ESP, 4));
+ __ j(kBelow, &skip_incr);
+ __ addss(t1, Address(ESP, 0));
+ __ Bind(&skip_incr);
+ __ addl(ESP, Immediate(8));
+ }
// Final conversion to an integer. Unfortunately this also does not have a
// direct x86 instruction, since NaN should map to 0 and large positive
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7dfbfb09be..ab8b05c3d4 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}
@@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
- // See intrinsics.h.
- if (!kRoundIsPlusPointFive) {
- return;
- }
CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
}