diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 18 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 27 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 8 |
6 files changed, 41 insertions, 45 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ade21174f4..a85cd54ff3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eadb431440..e0013634f1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3429a8fdbb..1a8eb58857 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,9 +27,6 @@ namespace art { class CompilerDriver; class DexFile; -// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 -static constexpr bool kRoundIsPlusPointFive = false; - // Positive floating-point infinities. static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e7c40e6600..e233672705 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register str = XRegisterFrom(locations->InAt(0)); - Register arg = XRegisterFrom(locations->InAt(1)); + Register str = InputRegisterAt(invoke, 0); + Register arg = InputRegisterAt(invoke, 1); + DCHECK(str.IsW()); + DCHECK(arg.IsW()); Register out = OutputRegister(invoke); Register temp0 = WRegisterFrom(locations->GetTemp(0)); @@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Subs(out, str, arg); __ B(&end, eq); // Load lengths of this and argument strings. - __ Ldr(temp0, MemOperand(str.X(), count_offset)); - __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); - __ Ldr(temp4, MemOperand(str.X(), temp1)); - __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Ldr(temp4, MemOperand(str.X(), temp1.X())); + __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); @@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Clz(temp1, temp1); // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1, LSR, 4)); + __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); __ B(le, &end); // Extract the characters and calculate the difference. __ Bic(temp1, temp1, 0xf); __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4, Operand(temp0, UXTH)); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); __ Bind(&end); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index dc409c92d6..22f4181b92 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); @@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { } XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); - Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); @@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ movss(t2, in); __ roundss(t1, in, Immediate(1)); __ subss(t2, t1); - __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); - __ j(kBelow, &skip_incr); - __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); - __ Bind(&skip_incr); + if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + // Direct constant area available. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + } else { + // No constant area: go through stack. + __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); + __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); + __ comiss(t2, Address(ESP, 4)); + __ j(kBelow, &skip_incr); + __ addss(t1, Address(ESP, 0)); + __ Bind(&skip_incr); + __ addl(ESP, Immediate(8)); + } // Final conversion to an integer. Unfortunately this also does not have a // direct x86 instruction, since NaN should map to 0 and large positive diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7dfbfb09be..ab8b05c3d4 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } @@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } |