blob: ecf919bcebed051b5fb4aef5edceaf0fc81d83dd [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86/instruction_set_features_x86.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040024#include "code_generator_x86.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
Andreas Gampe85b62f22015-09-09 13:15:38 -070027#include "intrinsics_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040028#include "mirror/array-inl.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040029#include "mirror/string.h"
30#include "thread.h"
31#include "utils/x86/assembler_x86.h"
32#include "utils/x86/constants_x86.h"
33
34namespace art {
35
36namespace x86 {
37
38static constexpr int kDoubleNaNHigh = 0x7FF80000;
39static constexpr int kDoubleNaNLow = 0x00000000;
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000040static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
41static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
Mark Mendell09ed1a32015-03-25 08:30:06 -040042
Mark Mendellfb8d2792015-03-31 22:16:59 -040043IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000044 : arena_(codegen->GetGraph()->GetArena()),
45 codegen_(codegen) {
Mark Mendellfb8d2792015-03-31 22:16:59 -040046}
47
48
Mark Mendell09ed1a32015-03-25 08:30:06 -040049X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
Roland Levillainb488b782015-10-22 11:38:49 +010050 return down_cast<X86Assembler*>(codegen_->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -040051}
52
53ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54 return codegen_->GetGraph()->GetArena();
55}
56
57bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
Roland Levillain0d5a2812015-11-13 10:07:31 +000060 if (res == nullptr) {
61 return false;
62 }
Roland Levillain0d5a2812015-11-13 10:07:31 +000063 return res->Intrinsified();
Mark Mendell09ed1a32015-03-25 08:30:06 -040064}
65
Roland Levillainec525fc2015-04-28 15:50:20 +010066static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +010067 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +010068 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Mark Mendell09ed1a32015-03-25 08:30:06 -040069}
70
Andreas Gampe85b62f22015-09-09 13:15:38 -070071using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
Mark Mendell09ed1a32015-03-25 08:30:06 -040072
Roland Levillain0b671c02016-08-19 12:02:34 +010073// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
75
76// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
77class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
78 public:
79 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
80 : SlowPathCode(instruction) {
81 DCHECK(kEmitCompilerReadBarrier);
82 DCHECK(kUseBakerReadBarrier);
83 }
84
85 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 LocationSummary* locations = instruction_->GetLocations();
88 DCHECK(locations->CanCall());
89 DCHECK(instruction_->IsInvokeStaticOrDirect())
90 << "Unexpected instruction in read barrier arraycopy slow path: "
91 << instruction_->DebugName();
92 DCHECK(instruction_->GetLocations()->Intrinsified());
93 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
94
95 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
96 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
97
98 Register src = locations->InAt(0).AsRegister<Register>();
99 Location src_pos = locations->InAt(1);
100 Register dest = locations->InAt(2).AsRegister<Register>();
101 Location dest_pos = locations->InAt(3);
102 Location length = locations->InAt(4);
103 Location temp1_loc = locations->GetTemp(0);
104 Register temp1 = temp1_loc.AsRegister<Register>();
105 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
106 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
107
108 __ Bind(GetEntryLabel());
109 // In this code path, registers `temp1`, `temp2`, and `temp3`
110 // (resp.) are not used for the base source address, the base
111 // destination address, and the end source address (resp.), as in
112 // other SystemArrayCopy intrinsic code paths. Instead they are
113 // (resp.) used for:
114 // - the loop index (`i`);
115 // - the source index (`src_index`) and the loaded (source)
116 // reference (`value`); and
117 // - the destination index (`dest_index`).
118
119 // i = 0
120 __ xorl(temp1, temp1);
121 NearLabel loop;
122 __ Bind(&loop);
123 // value = src_array[i + src_pos]
124 if (src_pos.IsConstant()) {
125 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
126 int32_t adjusted_offset = offset + constant * element_size;
127 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
128 } else {
129 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
130 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
131 }
132 __ MaybeUnpoisonHeapReference(temp2);
133 // TODO: Inline the mark bit check before calling the runtime?
134 // value = ReadBarrier::Mark(value)
135 // No need to save live registers; it's taken care of by the
136 // entrypoint. Also, there is no need to update the stack mask,
137 // as this runtime call will not trigger a garbage collection.
138 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
139 // explanations.)
140 DCHECK_NE(temp2, ESP);
141 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
142 int32_t entry_point_offset =
143 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
144 // This runtime call does not require a stack map.
145 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
146 __ MaybePoisonHeapReference(temp2);
147 // dest_array[i + dest_pos] = value
148 if (dest_pos.IsConstant()) {
149 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
150 int32_t adjusted_offset = offset + constant * element_size;
151 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
152 } else {
153 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
154 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
155 }
156 // ++i
157 __ addl(temp1, Immediate(1));
158 // if (i != length) goto loop
159 x86_codegen->GenerateIntCompare(temp1_loc, length);
160 __ j(kNotEqual, &loop);
161 __ jmp(GetExitLabel());
162 }
163
164 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
165
166 private:
167 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
168};
169
170#undef __
171
Mark Mendell09ed1a32015-03-25 08:30:06 -0400172#define __ assembler->
173
174static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
175 LocationSummary* locations = new (arena) LocationSummary(invoke,
176 LocationSummary::kNoCall,
177 kIntrinsified);
178 locations->SetInAt(0, Location::RequiresFpuRegister());
179 locations->SetOut(Location::RequiresRegister());
180 if (is64bit) {
181 locations->AddTemp(Location::RequiresFpuRegister());
182 }
183}
184
185static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
186 LocationSummary* locations = new (arena) LocationSummary(invoke,
187 LocationSummary::kNoCall,
188 kIntrinsified);
189 locations->SetInAt(0, Location::RequiresRegister());
190 locations->SetOut(Location::RequiresFpuRegister());
191 if (is64bit) {
192 locations->AddTemp(Location::RequiresFpuRegister());
193 locations->AddTemp(Location::RequiresFpuRegister());
194 }
195}
196
197static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
198 Location input = locations->InAt(0);
199 Location output = locations->Out();
200 if (is64bit) {
201 // Need to use the temporary.
202 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
203 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
204 __ movd(output.AsRegisterPairLow<Register>(), temp);
205 __ psrlq(temp, Immediate(32));
206 __ movd(output.AsRegisterPairHigh<Register>(), temp);
207 } else {
208 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
209 }
210}
211
212static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
213 Location input = locations->InAt(0);
214 Location output = locations->Out();
215 if (is64bit) {
216 // Need to use the temporary.
217 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
218 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
219 __ movd(temp1, input.AsRegisterPairLow<Register>());
220 __ movd(temp2, input.AsRegisterPairHigh<Register>());
221 __ punpckldq(temp1, temp2);
222 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
223 } else {
224 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
225 }
226}
227
228void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000229 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400230}
231void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000232 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400233}
234
235void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000236 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400237}
238void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000239 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400240}
241
242void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000243 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400244}
245void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000246 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400247}
248
249void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000250 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400251}
252void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000253 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400254}
255
256static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
257 LocationSummary* locations = new (arena) LocationSummary(invoke,
258 LocationSummary::kNoCall,
259 kIntrinsified);
260 locations->SetInAt(0, Location::RequiresRegister());
261 locations->SetOut(Location::SameAsFirstInput());
262}
263
264static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
265 LocationSummary* locations = new (arena) LocationSummary(invoke,
266 LocationSummary::kNoCall,
267 kIntrinsified);
268 locations->SetInAt(0, Location::RequiresRegister());
269 locations->SetOut(Location::RequiresRegister());
270}
271
272static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
273 LocationSummary* locations = new (arena) LocationSummary(invoke,
274 LocationSummary::kNoCall,
275 kIntrinsified);
276 locations->SetInAt(0, Location::RequiresRegister());
277 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
278}
279
280static void GenReverseBytes(LocationSummary* locations,
281 Primitive::Type size,
282 X86Assembler* assembler) {
283 Register out = locations->Out().AsRegister<Register>();
284
285 switch (size) {
286 case Primitive::kPrimShort:
287 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
288 __ bswapl(out);
289 __ sarl(out, Immediate(16));
290 break;
291 case Primitive::kPrimInt:
292 __ bswapl(out);
293 break;
294 default:
295 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
296 UNREACHABLE();
297 }
298}
299
300void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
301 CreateIntToIntLocations(arena_, invoke);
302}
303
304void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
305 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
306}
307
Mark Mendell58d25fd2015-04-03 14:52:31 -0400308void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
309 CreateLongToLongLocations(arena_, invoke);
310}
311
312void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
313 LocationSummary* locations = invoke->GetLocations();
314 Location input = locations->InAt(0);
315 Register input_lo = input.AsRegisterPairLow<Register>();
316 Register input_hi = input.AsRegisterPairHigh<Register>();
317 Location output = locations->Out();
318 Register output_lo = output.AsRegisterPairLow<Register>();
319 Register output_hi = output.AsRegisterPairHigh<Register>();
320
321 X86Assembler* assembler = GetAssembler();
322 // Assign the inputs to the outputs, mixing low/high.
323 __ movl(output_lo, input_hi);
324 __ movl(output_hi, input_lo);
325 __ bswapl(output_lo);
326 __ bswapl(output_hi);
327}
328
Mark Mendell09ed1a32015-03-25 08:30:06 -0400329void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
330 CreateIntToIntLocations(arena_, invoke);
331}
332
333void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
334 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
335}
336
337
338// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
339// need is 64b.
340
341static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
342 // TODO: Enable memory operations when the assembler supports them.
343 LocationSummary* locations = new (arena) LocationSummary(invoke,
344 LocationSummary::kNoCall,
345 kIntrinsified);
346 locations->SetInAt(0, Location::RequiresFpuRegister());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400347 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000348 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
349 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000350 if (static_or_direct->HasSpecialInput() &&
351 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000352 // We need addressibility for the constant area.
353 locations->SetInAt(1, Location::RequiresRegister());
354 // We need a temporary to hold the constant.
355 locations->AddTemp(Location::RequiresFpuRegister());
356 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400357}
358
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000359static void MathAbsFP(HInvoke* invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000360 bool is64bit,
361 X86Assembler* assembler,
362 CodeGeneratorX86* codegen) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000363 LocationSummary* locations = invoke->GetLocations();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400364 Location output = locations->Out();
365
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000366 DCHECK(output.IsFpuRegister());
Nicolas Geoffray97793072016-02-16 15:33:54 +0000367 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000368 HX86ComputeBaseMethodAddress* method_address =
369 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000370 DCHECK(locations->InAt(1).IsRegister());
371 // We also have a constant area pointer.
372 Register constant_area = locations->InAt(1).AsRegister<Register>();
373 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
374 if (is64bit) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000375 __ movsd(temp, codegen->LiteralInt64Address(
376 INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000377 __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
378 } else {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000379 __ movss(temp, codegen->LiteralInt32Address(
380 INT32_C(0x7FFFFFFF), method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000381 __ andps(output.AsFpuRegister<XmmRegister>(), temp);
382 }
383 } else {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400384 // Create the right constant on an aligned stack.
385 if (is64bit) {
386 __ subl(ESP, Immediate(8));
387 __ pushl(Immediate(0x7FFFFFFF));
388 __ pushl(Immediate(0xFFFFFFFF));
389 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
390 } else {
391 __ subl(ESP, Immediate(12));
392 __ pushl(Immediate(0x7FFFFFFF));
393 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
394 }
395 __ addl(ESP, Immediate(16));
Mark Mendell09ed1a32015-03-25 08:30:06 -0400396 }
397}
398
399void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
400 CreateFloatToFloat(arena_, invoke);
401}
402
403void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000404 MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400405}
406
407void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
408 CreateFloatToFloat(arena_, invoke);
409}
410
411void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000412 MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400413}
414
415static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
416 LocationSummary* locations = new (arena) LocationSummary(invoke,
417 LocationSummary::kNoCall,
418 kIntrinsified);
419 locations->SetInAt(0, Location::RegisterLocation(EAX));
420 locations->SetOut(Location::SameAsFirstInput());
421 locations->AddTemp(Location::RegisterLocation(EDX));
422}
423
424static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
425 Location output = locations->Out();
426 Register out = output.AsRegister<Register>();
427 DCHECK_EQ(out, EAX);
428 Register temp = locations->GetTemp(0).AsRegister<Register>();
429 DCHECK_EQ(temp, EDX);
430
431 // Sign extend EAX into EDX.
432 __ cdq();
433
434 // XOR EAX with sign.
435 __ xorl(EAX, EDX);
436
437 // Subtract out sign to correct.
438 __ subl(EAX, EDX);
439
440 // The result is in EAX.
441}
442
443static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
444 LocationSummary* locations = new (arena) LocationSummary(invoke,
445 LocationSummary::kNoCall,
446 kIntrinsified);
447 locations->SetInAt(0, Location::RequiresRegister());
448 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
449 locations->AddTemp(Location::RequiresRegister());
450}
451
452static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
453 Location input = locations->InAt(0);
454 Register input_lo = input.AsRegisterPairLow<Register>();
455 Register input_hi = input.AsRegisterPairHigh<Register>();
456 Location output = locations->Out();
457 Register output_lo = output.AsRegisterPairLow<Register>();
458 Register output_hi = output.AsRegisterPairHigh<Register>();
459 Register temp = locations->GetTemp(0).AsRegister<Register>();
460
461 // Compute the sign into the temporary.
462 __ movl(temp, input_hi);
463 __ sarl(temp, Immediate(31));
464
465 // Store the sign into the output.
466 __ movl(output_lo, temp);
467 __ movl(output_hi, temp);
468
469 // XOR the input to the output.
470 __ xorl(output_lo, input_lo);
471 __ xorl(output_hi, input_hi);
472
473 // Subtract the sign.
474 __ subl(output_lo, temp);
475 __ sbbl(output_hi, temp);
476}
477
478void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
479 CreateAbsIntLocation(arena_, invoke);
480}
481
482void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
483 GenAbsInteger(invoke->GetLocations(), GetAssembler());
484}
485
486void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
487 CreateAbsLongLocation(arena_, invoke);
488}
489
490void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
491 GenAbsLong(invoke->GetLocations(), GetAssembler());
492}
493
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000494static void GenMinMaxFP(HInvoke* invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000495 bool is_min,
496 bool is_double,
497 X86Assembler* assembler,
498 CodeGeneratorX86* codegen) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000499 LocationSummary* locations = invoke->GetLocations();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400500 Location op1_loc = locations->InAt(0);
501 Location op2_loc = locations->InAt(1);
502 Location out_loc = locations->Out();
503 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
504
505 // Shortcut for same input locations.
506 if (op1_loc.Equals(op2_loc)) {
507 DCHECK(out_loc.Equals(op1_loc));
508 return;
509 }
510
511 // (out := op1)
512 // out <=? op2
513 // if Nan jmp Nan_label
514 // if out is min jmp done
515 // if op2 is min jmp op2_label
516 // handle -0/+0
517 // jmp done
518 // Nan_label:
519 // out := NaN
520 // op2_label:
521 // out := op2
522 // done:
523 //
524 // This removes one jmp, but needs to copy one input (op1) to out.
525 //
526 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
527
528 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
529
Mark Mendell0c9497d2015-08-21 09:30:05 -0400530 NearLabel nan, done, op2_label;
Mark Mendell09ed1a32015-03-25 08:30:06 -0400531 if (is_double) {
532 __ ucomisd(out, op2);
533 } else {
534 __ ucomiss(out, op2);
535 }
536
537 __ j(Condition::kParityEven, &nan);
538
539 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
540 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
541
542 // Handle 0.0/-0.0.
543 if (is_min) {
544 if (is_double) {
545 __ orpd(out, op2);
546 } else {
547 __ orps(out, op2);
548 }
549 } else {
550 if (is_double) {
551 __ andpd(out, op2);
552 } else {
553 __ andps(out, op2);
554 }
555 }
556 __ jmp(&done);
557
558 // NaN handling.
559 __ Bind(&nan);
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000560 // Do we have a constant area pointer?
Nicolas Geoffray97793072016-02-16 15:33:54 +0000561 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000562 HX86ComputeBaseMethodAddress* method_address =
563 invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000564 DCHECK(locations->InAt(2).IsRegister());
565 Register constant_area = locations->InAt(2).AsRegister<Register>();
566 if (is_double) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000567 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000568 } else {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000569 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000570 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400571 } else {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000572 if (is_double) {
573 __ pushl(Immediate(kDoubleNaNHigh));
574 __ pushl(Immediate(kDoubleNaNLow));
575 __ movsd(out, Address(ESP, 0));
576 __ addl(ESP, Immediate(8));
577 } else {
578 __ pushl(Immediate(kFloatNaN));
579 __ movss(out, Address(ESP, 0));
580 __ addl(ESP, Immediate(4));
581 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400582 }
583 __ jmp(&done);
584
585 // out := op2;
586 __ Bind(&op2_label);
587 if (is_double) {
588 __ movsd(out, op2);
589 } else {
590 __ movss(out, op2);
591 }
592
593 // Done.
594 __ Bind(&done);
595}
596
597static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
598 LocationSummary* locations = new (arena) LocationSummary(invoke,
599 LocationSummary::kNoCall,
600 kIntrinsified);
601 locations->SetInAt(0, Location::RequiresFpuRegister());
602 locations->SetInAt(1, Location::RequiresFpuRegister());
603 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
604 // the second input to be the output (we can simply swap inputs).
605 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000606 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
607 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000608 if (static_or_direct->HasSpecialInput() &&
609 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000610 locations->SetInAt(2, Location::RequiresRegister());
611 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400612}
613
614void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
615 CreateFPFPToFPLocations(arena_, invoke);
616}
617
618void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000619 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000620 /* is_min */ true,
621 /* is_double */ true,
622 GetAssembler(),
623 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400624}
625
626void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
627 CreateFPFPToFPLocations(arena_, invoke);
628}
629
630void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000631 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000632 /* is_min */ true,
633 /* is_double */ false,
634 GetAssembler(),
635 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400636}
637
638void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
639 CreateFPFPToFPLocations(arena_, invoke);
640}
641
642void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000643 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000644 /* is_min */ false,
645 /* is_double */ true,
646 GetAssembler(),
647 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400648}
649
650void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
651 CreateFPFPToFPLocations(arena_, invoke);
652}
653
654void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000655 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000656 /* is_min */ false,
657 /* is_double */ false,
658 GetAssembler(),
659 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400660}
661
662static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
663 X86Assembler* assembler) {
664 Location op1_loc = locations->InAt(0);
665 Location op2_loc = locations->InAt(1);
666
667 // Shortcut for same input locations.
668 if (op1_loc.Equals(op2_loc)) {
669 // Can return immediately, as op1_loc == out_loc.
670 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
671 // a copy here.
672 DCHECK(locations->Out().Equals(op1_loc));
673 return;
674 }
675
676 if (is_long) {
677 // Need to perform a subtract to get the sign right.
678 // op1 is already in the same location as the output.
679 Location output = locations->Out();
680 Register output_lo = output.AsRegisterPairLow<Register>();
681 Register output_hi = output.AsRegisterPairHigh<Register>();
682
683 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
684 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
685
686 // Spare register to compute the subtraction to set condition code.
687 Register temp = locations->GetTemp(0).AsRegister<Register>();
688
689 // Subtract off op2_low.
690 __ movl(temp, output_lo);
691 __ subl(temp, op2_lo);
692
693 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
694 __ movl(temp, output_hi);
695 __ sbbl(temp, op2_hi);
696
697 // Now the condition code is correct.
698 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
699 __ cmovl(cond, output_lo, op2_lo);
700 __ cmovl(cond, output_hi, op2_hi);
701 } else {
702 Register out = locations->Out().AsRegister<Register>();
703 Register op2 = op2_loc.AsRegister<Register>();
704
705 // (out := op1)
706 // out <=? op2
707 // if out is min jmp done
708 // out := op2
709 // done:
710
711 __ cmpl(out, op2);
712 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
713 __ cmovl(cond, out, op2);
714 }
715}
716
717static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
718 LocationSummary* locations = new (arena) LocationSummary(invoke,
719 LocationSummary::kNoCall,
720 kIntrinsified);
721 locations->SetInAt(0, Location::RequiresRegister());
722 locations->SetInAt(1, Location::RequiresRegister());
723 locations->SetOut(Location::SameAsFirstInput());
724}
725
726static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
727 LocationSummary* locations = new (arena) LocationSummary(invoke,
728 LocationSummary::kNoCall,
729 kIntrinsified);
730 locations->SetInAt(0, Location::RequiresRegister());
731 locations->SetInAt(1, Location::RequiresRegister());
732 locations->SetOut(Location::SameAsFirstInput());
733 // Register to use to perform a long subtract to set cc.
734 locations->AddTemp(Location::RequiresRegister());
735}
736
737void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
738 CreateIntIntToIntLocations(arena_, invoke);
739}
740
741void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000742 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400743}
744
745void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
746 CreateLongLongToLongLocations(arena_, invoke);
747}
748
749void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000750 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400751}
752
753void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
754 CreateIntIntToIntLocations(arena_, invoke);
755}
756
757void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000758 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400759}
760
761void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
762 CreateLongLongToLongLocations(arena_, invoke);
763}
764
765void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000766 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400767}
768
769static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
770 LocationSummary* locations = new (arena) LocationSummary(invoke,
771 LocationSummary::kNoCall,
772 kIntrinsified);
773 locations->SetInAt(0, Location::RequiresFpuRegister());
774 locations->SetOut(Location::RequiresFpuRegister());
775}
776
777void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
778 CreateFPToFPLocations(arena_, invoke);
779}
780
781void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
782 LocationSummary* locations = invoke->GetLocations();
783 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
784 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
785
786 GetAssembler()->sqrtsd(out, in);
787}
788
Mark Mendellfb8d2792015-03-31 22:16:59 -0400789static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100790 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400791
792 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100793 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
794 Location::RegisterLocation(EAX));
Mingyao Yange90db122015-04-03 17:56:54 -0700795 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400796
797 // Copy the result back to the expected output.
798 Location out = invoke->GetLocations()->Out();
799 if (out.IsValid()) {
800 DCHECK(out.IsRegister());
Andreas Gampe85b62f22015-09-09 13:15:38 -0700801 codegen->MoveFromReturnRegister(out, invoke->GetType());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400802 }
803}
804
805static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
806 HInvoke* invoke,
807 CodeGeneratorX86* codegen) {
808 // Do we have instruction support?
809 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
810 CreateFPToFPLocations(arena, invoke);
811 return;
812 }
813
814 // We have to fall back to a call to the intrinsic.
815 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100816 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400817 InvokeRuntimeCallingConvention calling_convention;
818 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
819 locations->SetOut(Location::FpuRegisterLocation(XMM0));
820 // Needs to be EAX for the invoke.
821 locations->AddTemp(Location::RegisterLocation(EAX));
822}
823
824static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
825 HInvoke* invoke,
826 X86Assembler* assembler,
827 int round_mode) {
828 LocationSummary* locations = invoke->GetLocations();
829 if (locations->WillCall()) {
830 InvokeOutOfLineIntrinsic(codegen, invoke);
831 } else {
832 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
833 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
834 __ roundsd(out, in, Immediate(round_mode));
835 }
836}
837
838void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
839 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
840}
841
842void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
843 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
844}
845
846void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
847 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
848}
849
850void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
851 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
852}
853
854void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
855 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
856}
857
858void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
859 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
860}
861
Mark Mendellfb8d2792015-03-31 22:16:59 -0400862void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
863 // Do we have instruction support?
864 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
Aart Bik2c9f4952016-08-01 16:52:27 -0700865 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
866 DCHECK(static_or_direct != nullptr);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400867 LocationSummary* locations = new (arena_) LocationSummary(invoke,
868 LocationSummary::kNoCall,
869 kIntrinsified);
870 locations->SetInAt(0, Location::RequiresFpuRegister());
Aart Bik2c9f4952016-08-01 16:52:27 -0700871 if (static_or_direct->HasSpecialInput() &&
872 invoke->InputAt(
873 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
874 locations->SetInAt(1, Location::RequiresRegister());
875 }
Nicolas Geoffrayd9b92402015-04-21 10:02:22 +0100876 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400877 locations->AddTemp(Location::RequiresFpuRegister());
878 locations->AddTemp(Location::RequiresFpuRegister());
879 return;
880 }
881
882 // We have to fall back to a call to the intrinsic.
883 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Aart Bik2c9f4952016-08-01 16:52:27 -0700884 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400885 InvokeRuntimeCallingConvention calling_convention;
886 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
887 locations->SetOut(Location::RegisterLocation(EAX));
888 // Needs to be EAX for the invoke.
889 locations->AddTemp(Location::RegisterLocation(EAX));
890}
891
892void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
893 LocationSummary* locations = invoke->GetLocations();
Aart Bik2c9f4952016-08-01 16:52:27 -0700894 if (locations->WillCall()) { // TODO: can we reach this?
Mark Mendellfb8d2792015-03-31 22:16:59 -0400895 InvokeOutOfLineIntrinsic(codegen_, invoke);
896 return;
897 }
898
Mark Mendellfb8d2792015-03-31 22:16:59 -0400899 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700900 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
901 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400902 Register out = locations->Out().AsRegister<Register>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700903 NearLabel skip_incr, done;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400904 X86Assembler* assembler = GetAssembler();
905
Aart Bik2c9f4952016-08-01 16:52:27 -0700906 // Since no direct x86 rounding instruction matches the required semantics,
907 // this intrinsic is implemented as follows:
908 // result = floor(in);
909 // if (in - result >= 0.5f)
910 // result = result + 1.0f;
911 __ movss(t2, in);
912 __ roundss(t1, in, Immediate(1));
913 __ subss(t2, t1);
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700914 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
915 // Direct constant area available.
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000916 HX86ComputeBaseMethodAddress* method_address =
917 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700918 Register constant_area = locations->InAt(1).AsRegister<Register>();
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000919 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
920 method_address,
921 constant_area));
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700922 __ j(kBelow, &skip_incr);
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000923 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
924 method_address,
925 constant_area));
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700926 __ Bind(&skip_incr);
927 } else {
928 // No constant area: go through stack.
929 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
930 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
931 __ comiss(t2, Address(ESP, 4));
932 __ j(kBelow, &skip_incr);
933 __ addss(t1, Address(ESP, 0));
934 __ Bind(&skip_incr);
935 __ addl(ESP, Immediate(8));
936 }
Mark Mendellfb8d2792015-03-31 22:16:59 -0400937
Aart Bik2c9f4952016-08-01 16:52:27 -0700938 // Final conversion to an integer. Unfortunately this also does not have a
939 // direct x86 instruction, since NaN should map to 0 and large positive
940 // values need to be clipped to the extreme value.
Mark Mendellfb8d2792015-03-31 22:16:59 -0400941 __ movl(out, Immediate(kPrimIntMax));
Aart Bik2c9f4952016-08-01 16:52:27 -0700942 __ cvtsi2ss(t2, out);
943 __ comiss(t1, t2);
944 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
945 __ movl(out, Immediate(0)); // does not change flags
946 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
947 __ cvttss2si(out, t1);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400948 __ Bind(&done);
949}
950
Mark Mendella4f12202015-08-06 15:23:34 -0400951static void CreateFPToFPCallLocations(ArenaAllocator* arena,
952 HInvoke* invoke) {
953 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100954 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400955 kIntrinsified);
956 InvokeRuntimeCallingConvention calling_convention;
957 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
958 locations->SetOut(Location::FpuRegisterLocation(XMM0));
959}
960
961static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
962 LocationSummary* locations = invoke->GetLocations();
963 DCHECK(locations->WillCall());
964 DCHECK(invoke->IsInvokeStaticOrDirect());
965 X86Assembler* assembler = codegen->GetAssembler();
966
967 // We need some place to pass the parameters.
968 __ subl(ESP, Immediate(16));
969 __ cfi().AdjustCFAOffset(16);
970
971 // Pass the parameters at the bottom of the stack.
972 __ movsd(Address(ESP, 0), XMM0);
973
974 // If we have a second parameter, pass it next.
975 if (invoke->GetNumberOfArguments() == 2) {
976 __ movsd(Address(ESP, 8), XMM1);
977 }
978
979 // Now do the actual call.
Serban Constantinescuba45db02016-07-12 22:53:02 +0100980 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
Mark Mendella4f12202015-08-06 15:23:34 -0400981
982 // Extract the return value from the FP stack.
983 __ fstpl(Address(ESP, 0));
984 __ movsd(XMM0, Address(ESP, 0));
985
986 // And clean up the stack.
987 __ addl(ESP, Immediate(16));
988 __ cfi().AdjustCFAOffset(-16);
Mark Mendella4f12202015-08-06 15:23:34 -0400989}
990
991void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
992 CreateFPToFPCallLocations(arena_, invoke);
993}
994
995void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
996 GenFPToFPCall(invoke, codegen_, kQuickCos);
997}
998
999void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
1000 CreateFPToFPCallLocations(arena_, invoke);
1001}
1002
1003void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
1004 GenFPToFPCall(invoke, codegen_, kQuickSin);
1005}
1006
1007void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
1008 CreateFPToFPCallLocations(arena_, invoke);
1009}
1010
1011void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
1012 GenFPToFPCall(invoke, codegen_, kQuickAcos);
1013}
1014
1015void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
1016 CreateFPToFPCallLocations(arena_, invoke);
1017}
1018
1019void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
1020 GenFPToFPCall(invoke, codegen_, kQuickAsin);
1021}
1022
1023void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
1024 CreateFPToFPCallLocations(arena_, invoke);
1025}
1026
1027void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
1028 GenFPToFPCall(invoke, codegen_, kQuickAtan);
1029}
1030
1031void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
1032 CreateFPToFPCallLocations(arena_, invoke);
1033}
1034
1035void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
1036 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1037}
1038
1039void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
1040 CreateFPToFPCallLocations(arena_, invoke);
1041}
1042
1043void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
1044 GenFPToFPCall(invoke, codegen_, kQuickCosh);
1045}
1046
1047void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
1048 CreateFPToFPCallLocations(arena_, invoke);
1049}
1050
1051void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
1052 GenFPToFPCall(invoke, codegen_, kQuickExp);
1053}
1054
1055void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
1056 CreateFPToFPCallLocations(arena_, invoke);
1057}
1058
1059void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
1060 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1061}
1062
1063void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
1064 CreateFPToFPCallLocations(arena_, invoke);
1065}
1066
1067void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
1068 GenFPToFPCall(invoke, codegen_, kQuickLog);
1069}
1070
1071void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
1072 CreateFPToFPCallLocations(arena_, invoke);
1073}
1074
1075void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
1076 GenFPToFPCall(invoke, codegen_, kQuickLog10);
1077}
1078
1079void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
1080 CreateFPToFPCallLocations(arena_, invoke);
1081}
1082
1083void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
1084 GenFPToFPCall(invoke, codegen_, kQuickSinh);
1085}
1086
1087void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
1088 CreateFPToFPCallLocations(arena_, invoke);
1089}
1090
1091void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
1092 GenFPToFPCall(invoke, codegen_, kQuickTan);
1093}
1094
1095void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
1096 CreateFPToFPCallLocations(arena_, invoke);
1097}
1098
1099void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
1100 GenFPToFPCall(invoke, codegen_, kQuickTanh);
1101}
1102
1103static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
1104 HInvoke* invoke) {
1105 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001106 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -04001107 kIntrinsified);
1108 InvokeRuntimeCallingConvention calling_convention;
1109 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1110 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1111 locations->SetOut(Location::FpuRegisterLocation(XMM0));
1112}
1113
1114void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1115 CreateFPFPToFPCallLocations(arena_, invoke);
1116}
1117
1118void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1119 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1120}
1121
1122void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1123 CreateFPFPToFPCallLocations(arena_, invoke);
1124}
1125
1126void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1127 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1128}
1129
1130void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1131 CreateFPFPToFPCallLocations(arena_, invoke);
1132}
1133
1134void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1135 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1136}
1137
Mark Mendell6bc53a92015-07-01 14:26:52 -04001138void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1139 // We need at least two of the positions or length to be an integer constant,
1140 // or else we won't have enough free registers.
1141 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1142 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1143 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1144
1145 int num_constants =
1146 ((src_pos != nullptr) ? 1 : 0)
1147 + ((dest_pos != nullptr) ? 1 : 0)
1148 + ((length != nullptr) ? 1 : 0);
1149
1150 if (num_constants < 2) {
1151 // Not enough free registers.
1152 return;
1153 }
1154
1155 // As long as we are checking, we might as well check to see if the src and dest
1156 // positions are >= 0.
1157 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1158 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1159 // We will have to fail anyways.
1160 return;
1161 }
1162
1163 // And since we are already checking, check the length too.
1164 if (length != nullptr) {
1165 int32_t len = length->GetValue();
1166 if (len < 0) {
1167 // Just call as normal.
1168 return;
1169 }
1170 }
1171
1172 // Okay, it is safe to generate inline code.
1173 LocationSummary* locations =
1174 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1175 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1176 locations->SetInAt(0, Location::RequiresRegister());
1177 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1178 locations->SetInAt(2, Location::RequiresRegister());
1179 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1180 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1181
1182 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1183 locations->AddTemp(Location::RegisterLocation(ESI));
1184 locations->AddTemp(Location::RegisterLocation(EDI));
1185 locations->AddTemp(Location::RegisterLocation(ECX));
1186}
1187
1188static void CheckPosition(X86Assembler* assembler,
1189 Location pos,
1190 Register input,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001191 Location length,
Andreas Gampe85b62f22015-09-09 13:15:38 -07001192 SlowPathCode* slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001193 Register temp,
1194 bool length_is_input_length = false) {
1195 // Where is the length in the Array?
Mark Mendell6bc53a92015-07-01 14:26:52 -04001196 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1197
1198 if (pos.IsConstant()) {
1199 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1200 if (pos_const == 0) {
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001201 if (!length_is_input_length) {
1202 // Check that length(input) >= length.
1203 if (length.IsConstant()) {
1204 __ cmpl(Address(input, length_offset),
1205 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1206 } else {
1207 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
1208 }
1209 __ j(kLess, slow_path->GetEntryLabel());
1210 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001211 } else {
1212 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001213 __ movl(temp, Address(input, length_offset));
1214 __ subl(temp, Immediate(pos_const));
Mark Mendell6bc53a92015-07-01 14:26:52 -04001215 __ j(kLess, slow_path->GetEntryLabel());
1216
1217 // Check that (length(input) - pos) >= length.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001218 if (length.IsConstant()) {
1219 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1220 } else {
1221 __ cmpl(temp, length.AsRegister<Register>());
1222 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001223 __ j(kLess, slow_path->GetEntryLabel());
1224 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001225 } else if (length_is_input_length) {
1226 // The only way the copy can succeed is if pos is zero.
1227 Register pos_reg = pos.AsRegister<Register>();
1228 __ testl(pos_reg, pos_reg);
1229 __ j(kNotEqual, slow_path->GetEntryLabel());
Mark Mendell6bc53a92015-07-01 14:26:52 -04001230 } else {
1231 // Check that pos >= 0.
1232 Register pos_reg = pos.AsRegister<Register>();
1233 __ testl(pos_reg, pos_reg);
1234 __ j(kLess, slow_path->GetEntryLabel());
1235
1236 // Check that pos <= length(input).
1237 __ cmpl(Address(input, length_offset), pos_reg);
1238 __ j(kLess, slow_path->GetEntryLabel());
1239
1240 // Check that (length(input) - pos) >= length.
1241 __ movl(temp, Address(input, length_offset));
1242 __ subl(temp, pos_reg);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001243 if (length.IsConstant()) {
1244 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1245 } else {
1246 __ cmpl(temp, length.AsRegister<Register>());
1247 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001248 __ j(kLess, slow_path->GetEntryLabel());
1249 }
1250}
1251
1252void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1253 X86Assembler* assembler = GetAssembler();
1254 LocationSummary* locations = invoke->GetLocations();
1255
1256 Register src = locations->InAt(0).AsRegister<Register>();
1257 Location srcPos = locations->InAt(1);
1258 Register dest = locations->InAt(2).AsRegister<Register>();
1259 Location destPos = locations->InAt(3);
1260 Location length = locations->InAt(4);
1261
1262 // Temporaries that we need for MOVSW.
1263 Register src_base = locations->GetTemp(0).AsRegister<Register>();
1264 DCHECK_EQ(src_base, ESI);
1265 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1266 DCHECK_EQ(dest_base, EDI);
1267 Register count = locations->GetTemp(2).AsRegister<Register>();
1268 DCHECK_EQ(count, ECX);
1269
Andreas Gampe85b62f22015-09-09 13:15:38 -07001270 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001271 codegen_->AddSlowPath(slow_path);
1272
1273 // Bail out if the source and destination are the same (to handle overlap).
1274 __ cmpl(src, dest);
1275 __ j(kEqual, slow_path->GetEntryLabel());
1276
1277 // Bail out if the source is null.
1278 __ testl(src, src);
1279 __ j(kEqual, slow_path->GetEntryLabel());
1280
1281 // Bail out if the destination is null.
1282 __ testl(dest, dest);
1283 __ j(kEqual, slow_path->GetEntryLabel());
1284
1285 // If the length is negative, bail out.
1286 // We have already checked in the LocationsBuilder for the constant case.
1287 if (!length.IsConstant()) {
1288 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1289 __ j(kLess, slow_path->GetEntryLabel());
1290 }
1291
1292 // We need the count in ECX.
1293 if (length.IsConstant()) {
1294 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1295 } else {
1296 __ movl(count, length.AsRegister<Register>());
1297 }
1298
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001299 // Validity checks: source. Use src_base as a temporary register.
1300 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001301
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001302 // Validity checks: dest. Use src_base as a temporary register.
1303 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001304
1305 // Okay, everything checks out. Finally time to do the copy.
1306 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1307 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1308 DCHECK_EQ(char_size, 2u);
1309
1310 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1311
1312 if (srcPos.IsConstant()) {
1313 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1314 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1315 } else {
1316 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1317 ScaleFactor::TIMES_2, data_offset));
1318 }
1319 if (destPos.IsConstant()) {
1320 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1321
1322 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1323 } else {
1324 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1325 ScaleFactor::TIMES_2, data_offset));
1326 }
1327
1328 // Do the move.
1329 __ rep_movsw();
1330
1331 __ Bind(slow_path->GetExitLabel());
1332}
1333
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001334void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1335 // The inputs plus one temp.
1336 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001337 LocationSummary::kCallOnMainAndSlowPath,
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001338 kIntrinsified);
1339 InvokeRuntimeCallingConvention calling_convention;
1340 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1341 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1342 locations->SetOut(Location::RegisterLocation(EAX));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001343}
1344
1345void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1346 X86Assembler* assembler = GetAssembler();
1347 LocationSummary* locations = invoke->GetLocations();
1348
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001349 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001350 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001351
1352 Register argument = locations->InAt(1).AsRegister<Register>();
1353 __ testl(argument, argument);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001354 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001355 codegen_->AddSlowPath(slow_path);
1356 __ j(kEqual, slow_path->GetEntryLabel());
1357
Serban Constantinescuba45db02016-07-12 22:53:02 +01001358 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001359 __ Bind(slow_path->GetExitLabel());
1360}
1361
Agi Csakid7138c82015-08-13 17:46:44 -07001362void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1363 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1364 LocationSummary::kNoCall,
1365 kIntrinsified);
1366 locations->SetInAt(0, Location::RequiresRegister());
1367 locations->SetInAt(1, Location::RequiresRegister());
1368
1369 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1370 locations->AddTemp(Location::RegisterLocation(ECX));
1371 locations->AddTemp(Location::RegisterLocation(EDI));
1372
1373 // Set output, ESI needed for repe_cmpsl instruction anyways.
1374 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1375}
1376
1377void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1378 X86Assembler* assembler = GetAssembler();
1379 LocationSummary* locations = invoke->GetLocations();
1380
1381 Register str = locations->InAt(0).AsRegister<Register>();
1382 Register arg = locations->InAt(1).AsRegister<Register>();
1383 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1384 Register edi = locations->GetTemp(1).AsRegister<Register>();
1385 Register esi = locations->Out().AsRegister<Register>();
1386
Mark Mendell0c9497d2015-08-21 09:30:05 -04001387 NearLabel end, return_true, return_false;
Agi Csakid7138c82015-08-13 17:46:44 -07001388
1389 // Get offsets of count, value, and class fields within a string object.
1390 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1391 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1392 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1393
1394 // Note that the null check must have been done earlier.
1395 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1396
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001397 StringEqualsOptimizations optimizations(invoke);
1398 if (!optimizations.GetArgumentNotNull()) {
1399 // Check if input is null, return false if it is.
1400 __ testl(arg, arg);
1401 __ j(kEqual, &return_false);
1402 }
Agi Csakid7138c82015-08-13 17:46:44 -07001403
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001404 if (!optimizations.GetArgumentIsString()) {
Vladimir Marko53b52002016-05-24 19:30:45 +01001405 // Instanceof check for the argument by comparing class fields.
1406 // All string objects must have the same type since String cannot be subclassed.
1407 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1408 // If the argument is a string object, its class field must be equal to receiver's class field.
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001409 __ movl(ecx, Address(str, class_offset));
1410 __ cmpl(ecx, Address(arg, class_offset));
1411 __ j(kNotEqual, &return_false);
1412 }
Agi Csakid7138c82015-08-13 17:46:44 -07001413
1414 // Reference equality check, return true if same reference.
1415 __ cmpl(str, arg);
1416 __ j(kEqual, &return_true);
1417
jessicahandojo4877b792016-09-08 19:49:13 -07001418 // Load length and compression flag of receiver string.
Agi Csakid7138c82015-08-13 17:46:44 -07001419 __ movl(ecx, Address(str, count_offset));
jessicahandojo4877b792016-09-08 19:49:13 -07001420 // Check if lengths and compression flags are equal, return false if they're not.
1421 // Two identical strings will always have same compression style since
1422 // compression style is decided on alloc.
Agi Csakid7138c82015-08-13 17:46:44 -07001423 __ cmpl(ecx, Address(arg, count_offset));
1424 __ j(kNotEqual, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001425 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1426 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1427 "Expecting 0=compressed, 1=uncompressed");
1428 __ jecxz(&return_true);
Agi Csakid7138c82015-08-13 17:46:44 -07001429
jessicahandojo4877b792016-09-08 19:49:13 -07001430 if (mirror::kUseStringCompression) {
1431 NearLabel string_uncompressed;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001432 // Extract length and differentiate between both compressed or both uncompressed.
1433 // Different compression style is cut above.
1434 __ shrl(ecx, Immediate(1));
1435 __ j(kCarrySet, &string_uncompressed);
jessicahandojo4877b792016-09-08 19:49:13 -07001436 // Divide string length by 2, rounding up, and continue as if uncompressed.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001437 __ addl(ecx, Immediate(1));
jessicahandojo4877b792016-09-08 19:49:13 -07001438 __ shrl(ecx, Immediate(1));
1439 __ Bind(&string_uncompressed);
1440 }
Agi Csakid7138c82015-08-13 17:46:44 -07001441 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1442 __ leal(esi, Address(str, value_offset));
1443 __ leal(edi, Address(arg, value_offset));
1444
jessicahandojo4877b792016-09-08 19:49:13 -07001445 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1446 // divisible by 2.
Agi Csakid7138c82015-08-13 17:46:44 -07001447 __ addl(ecx, Immediate(1));
1448 __ shrl(ecx, Immediate(1));
1449
jessicahandojo4877b792016-09-08 19:49:13 -07001450 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1451 // or 4 characters (compressed) at a time.
Agi Csakid7138c82015-08-13 17:46:44 -07001452 DCHECK_ALIGNED(value_offset, 4);
1453 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1454
1455 // Loop to compare strings two characters at a time starting at the beginning of the string.
1456 __ repe_cmpsl();
1457 // If strings are not equal, zero flag will be cleared.
1458 __ j(kNotEqual, &return_false);
1459
1460 // Return true and exit the function.
1461 // If loop does not result in returning false, we return true.
1462 __ Bind(&return_true);
1463 __ movl(esi, Immediate(1));
1464 __ jmp(&end);
1465
1466 // Return false and exit the function.
1467 __ Bind(&return_false);
1468 __ xorl(esi, esi);
1469 __ Bind(&end);
1470}
1471
Andreas Gampe21030dd2015-05-07 14:46:15 -07001472static void CreateStringIndexOfLocations(HInvoke* invoke,
1473 ArenaAllocator* allocator,
1474 bool start_at_zero) {
1475 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1476 LocationSummary::kCallOnSlowPath,
1477 kIntrinsified);
1478 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1479 locations->SetInAt(0, Location::RegisterLocation(EDI));
1480 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1481 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1482 // of the instruction explicitly.
1483 // Note: This works as we don't clobber EAX anywhere.
1484 locations->SetInAt(1, Location::RegisterLocation(EAX));
1485 if (!start_at_zero) {
1486 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1487 }
1488 // As we clobber EDI during execution anyways, also use it as the output.
1489 locations->SetOut(Location::SameAsFirstInput());
1490
1491 // repne scasw uses ECX as the counter.
1492 locations->AddTemp(Location::RegisterLocation(ECX));
1493 // Need another temporary to be able to compute the result.
1494 locations->AddTemp(Location::RequiresRegister());
jessicahandojo4877b792016-09-08 19:49:13 -07001495 if (mirror::kUseStringCompression) {
1496 // Need another temporary to be able to save unflagged string length.
1497 locations->AddTemp(Location::RequiresRegister());
1498 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001499}
1500
1501static void GenerateStringIndexOf(HInvoke* invoke,
1502 X86Assembler* assembler,
1503 CodeGeneratorX86* codegen,
1504 ArenaAllocator* allocator,
1505 bool start_at_zero) {
1506 LocationSummary* locations = invoke->GetLocations();
1507
1508 // Note that the null check must have been done earlier.
1509 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1510
1511 Register string_obj = locations->InAt(0).AsRegister<Register>();
1512 Register search_value = locations->InAt(1).AsRegister<Register>();
1513 Register counter = locations->GetTemp(0).AsRegister<Register>();
1514 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1515 Register out = locations->Out().AsRegister<Register>();
jessicahandojo4877b792016-09-08 19:49:13 -07001516 // Only used when string compression feature is on.
1517 Register string_length_flagged;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001518
1519 // Check our assumptions for registers.
1520 DCHECK_EQ(string_obj, EDI);
1521 DCHECK_EQ(search_value, EAX);
1522 DCHECK_EQ(counter, ECX);
1523 DCHECK_EQ(out, EDI);
1524
1525 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001526 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampe85b62f22015-09-09 13:15:38 -07001527 SlowPathCode* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001528 HInstruction* code_point = invoke->InputAt(1);
1529 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01001530 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
Andreas Gampe21030dd2015-05-07 14:46:15 -07001531 std::numeric_limits<uint16_t>::max()) {
1532 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1533 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1534 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1535 codegen->AddSlowPath(slow_path);
1536 __ jmp(slow_path->GetEntryLabel());
1537 __ Bind(slow_path->GetExitLabel());
1538 return;
1539 }
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001540 } else if (code_point->GetType() != Primitive::kPrimChar) {
Andreas Gampe21030dd2015-05-07 14:46:15 -07001541 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1542 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1543 codegen->AddSlowPath(slow_path);
1544 __ j(kAbove, slow_path->GetEntryLabel());
1545 }
1546
1547 // From here down, we know that we are looking for a char that fits in 16 bits.
1548 // Location of reference to data array within the String object.
1549 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1550 // Location of count within the String object.
1551 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1552
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001553 // Load the count field of the string containing the length and compression flag.
Andreas Gampe21030dd2015-05-07 14:46:15 -07001554 __ movl(string_length, Address(string_obj, count_offset));
1555
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001556 // Do a zero-length check. Even with string compression `count == 0` means empty.
1557 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1558 "Expecting 0=compressed, 1=uncompressed");
Andreas Gampe21030dd2015-05-07 14:46:15 -07001559 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001560 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001561 __ testl(string_length, string_length);
1562 __ j(kEqual, &not_found_label);
1563
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001564 if (mirror::kUseStringCompression) {
1565 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1566 __ movl(string_length_flagged, string_length);
1567 // Extract the length and shift out the least significant bit used as compression flag.
1568 __ shrl(string_length, Immediate(1));
1569 }
1570
Andreas Gampe21030dd2015-05-07 14:46:15 -07001571 if (start_at_zero) {
1572 // Number of chars to scan is the same as the string length.
1573 __ movl(counter, string_length);
1574
1575 // Move to the start of the string.
1576 __ addl(string_obj, Immediate(value_offset));
1577 } else {
1578 Register start_index = locations->InAt(2).AsRegister<Register>();
1579
1580 // Do a start_index check.
1581 __ cmpl(start_index, string_length);
1582 __ j(kGreaterEqual, &not_found_label);
1583
1584 // Ensure we have a start index >= 0;
1585 __ xorl(counter, counter);
1586 __ cmpl(start_index, Immediate(0));
1587 __ cmovl(kGreater, counter, start_index);
1588
jessicahandojo4877b792016-09-08 19:49:13 -07001589 if (mirror::kUseStringCompression) {
1590 NearLabel modify_counter, offset_uncompressed_label;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001591 __ testl(string_length_flagged, Immediate(1));
1592 __ j(kNotZero, &offset_uncompressed_label);
jessicahandojo4877b792016-09-08 19:49:13 -07001593 // Move to the start of the string: string_obj + value_offset + start_index.
1594 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1595 __ jmp(&modify_counter);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001596
jessicahandojo4877b792016-09-08 19:49:13 -07001597 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1598 __ Bind(&offset_uncompressed_label);
1599 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1600
1601 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1602 // compare.
1603 __ Bind(&modify_counter);
1604 } else {
1605 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1606 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001607 __ negl(counter);
1608 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1609 }
1610
jessicahandojo4877b792016-09-08 19:49:13 -07001611 if (mirror::kUseStringCompression) {
1612 NearLabel uncompressed_string_comparison;
1613 NearLabel comparison_done;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001614 __ testl(string_length_flagged, Immediate(1));
1615 __ j(kNotZero, &uncompressed_string_comparison);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001616
jessicahandojo4877b792016-09-08 19:49:13 -07001617 // Check if EAX (search_value) is ASCII.
1618 __ cmpl(search_value, Immediate(127));
1619 __ j(kGreater, &not_found_label);
1620 // Comparing byte-per-byte.
1621 __ repne_scasb();
1622 __ jmp(&comparison_done);
1623
1624 // Everything is set up for repne scasw:
1625 // * Comparison address in EDI.
1626 // * Counter in ECX.
1627 __ Bind(&uncompressed_string_comparison);
1628 __ repne_scasw();
1629 __ Bind(&comparison_done);
1630 } else {
1631 __ repne_scasw();
1632 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001633 // Did we find a match?
1634 __ j(kNotEqual, &not_found_label);
1635
1636 // Yes, we matched. Compute the index of the result.
1637 __ subl(string_length, counter);
1638 __ leal(out, Address(string_length, -1));
1639
Mark Mendell0c9497d2015-08-21 09:30:05 -04001640 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001641 __ jmp(&done);
1642
1643 // Failed to match; return -1.
1644 __ Bind(&not_found_label);
1645 __ movl(out, Immediate(-1));
1646
1647 // And join up at the end.
1648 __ Bind(&done);
1649 if (slow_path != nullptr) {
1650 __ Bind(slow_path->GetExitLabel());
1651 }
1652}
1653
1654void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001655 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001656}
1657
1658void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001659 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001660}
1661
1662void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001663 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001664}
1665
1666void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001667 GenerateStringIndexOf(
1668 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001669}
1670
Jeff Hao848f70a2014-01-15 13:49:50 -08001671void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1672 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001673 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001674 kIntrinsified);
1675 InvokeRuntimeCallingConvention calling_convention;
1676 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1677 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1678 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1679 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1680 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001681}
1682
1683void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1684 X86Assembler* assembler = GetAssembler();
1685 LocationSummary* locations = invoke->GetLocations();
1686
1687 Register byte_array = locations->InAt(0).AsRegister<Register>();
1688 __ testl(byte_array, byte_array);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001689 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001690 codegen_->AddSlowPath(slow_path);
1691 __ j(kEqual, slow_path->GetEntryLabel());
1692
Serban Constantinescuba45db02016-07-12 22:53:02 +01001693 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001694 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001695 __ Bind(slow_path->GetExitLabel());
1696}
1697
1698void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1699 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001700 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001701 kIntrinsified);
1702 InvokeRuntimeCallingConvention calling_convention;
1703 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1704 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1705 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1706 locations->SetOut(Location::RegisterLocation(EAX));
1707}
1708
1709void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
Roland Levillaincc3839c2016-02-29 16:23:48 +00001710 // No need to emit code checking whether `locations->InAt(2)` is a null
1711 // pointer, as callers of the native method
1712 //
1713 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1714 //
1715 // all include a null check on `data` before calling that method.
Serban Constantinescuba45db02016-07-12 22:53:02 +01001716 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001717 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001718}
1719
1720void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1721 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001722 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001723 kIntrinsified);
1724 InvokeRuntimeCallingConvention calling_convention;
1725 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1726 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001727}
1728
1729void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1730 X86Assembler* assembler = GetAssembler();
1731 LocationSummary* locations = invoke->GetLocations();
1732
1733 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1734 __ testl(string_to_copy, string_to_copy);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001735 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001736 codegen_->AddSlowPath(slow_path);
1737 __ j(kEqual, slow_path->GetEntryLabel());
1738
Serban Constantinescuba45db02016-07-12 22:53:02 +01001739 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001740 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001741 __ Bind(slow_path->GetExitLabel());
1742}
1743
Mark Mendell8f8926a2015-08-17 11:39:06 -04001744void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1745 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1746 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1747 LocationSummary::kNoCall,
1748 kIntrinsified);
1749 locations->SetInAt(0, Location::RequiresRegister());
1750 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1751 // Place srcEnd in ECX to save a move below.
1752 locations->SetInAt(2, Location::RegisterLocation(ECX));
1753 locations->SetInAt(3, Location::RequiresRegister());
1754 locations->SetInAt(4, Location::RequiresRegister());
1755
1756 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1757 // We don't have enough registers to also grab ECX, so handle below.
1758 locations->AddTemp(Location::RegisterLocation(ESI));
1759 locations->AddTemp(Location::RegisterLocation(EDI));
1760}
1761
1762void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1763 X86Assembler* assembler = GetAssembler();
1764 LocationSummary* locations = invoke->GetLocations();
1765
1766 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1767 // Location of data in char array buffer.
1768 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1769 // Location of char array data in string.
1770 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1771
1772 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1773 Register obj = locations->InAt(0).AsRegister<Register>();
1774 Location srcBegin = locations->InAt(1);
1775 int srcBegin_value =
1776 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1777 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1778 Register dst = locations->InAt(3).AsRegister<Register>();
1779 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1780
1781 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1782 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1783 DCHECK_EQ(char_size, 2u);
1784
Mark Mendell8f8926a2015-08-17 11:39:06 -04001785 // Compute the number of chars (words) to move.
jessicahandojo4877b792016-09-08 19:49:13 -07001786 // Save ECX, since we don't know if it will be used later.
Mark Mendell8f8926a2015-08-17 11:39:06 -04001787 __ pushl(ECX);
1788 int stack_adjust = kX86WordSize;
1789 __ cfi().AdjustCFAOffset(stack_adjust);
1790 DCHECK_EQ(srcEnd, ECX);
1791 if (srcBegin.IsConstant()) {
jessicahandojo4877b792016-09-08 19:49:13 -07001792 __ subl(ECX, Immediate(srcBegin_value));
Mark Mendell8f8926a2015-08-17 11:39:06 -04001793 } else {
1794 DCHECK(srcBegin.IsRegister());
1795 __ subl(ECX, srcBegin.AsRegister<Register>());
1796 }
1797
jessicahandojo4877b792016-09-08 19:49:13 -07001798 NearLabel done;
1799 if (mirror::kUseStringCompression) {
1800 // Location of count in string
1801 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1802 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1803 DCHECK_EQ(c_char_size, 1u);
1804 __ pushl(EAX);
1805 __ cfi().AdjustCFAOffset(stack_adjust);
1806
1807 NearLabel copy_loop, copy_uncompressed;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001808 __ testl(Address(obj, count_offset), Immediate(1));
1809 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1810 "Expecting 0=compressed, 1=uncompressed");
1811 __ j(kNotZero, &copy_uncompressed);
jessicahandojo4877b792016-09-08 19:49:13 -07001812 // Compute the address of the source string by adding the number of chars from
1813 // the source beginning to the value offset of a string.
1814 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1815
1816 // Start the loop to copy String's value to Array of Char.
1817 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1818 __ Bind(&copy_loop);
1819 __ jecxz(&done);
1820 // Use EAX temporary (convert byte from ESI to word).
1821 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1822 __ movzxb(EAX, Address(ESI, 0));
1823 __ movw(Address(EDI, 0), EAX);
1824 __ leal(EDI, Address(EDI, char_size));
1825 __ leal(ESI, Address(ESI, c_char_size));
1826 // TODO: Add support for LOOP to X86Assembler.
1827 __ subl(ECX, Immediate(1));
1828 __ jmp(&copy_loop);
1829 __ Bind(&copy_uncompressed);
1830 }
1831
1832 // Do the copy for uncompressed string.
1833 // Compute the address of the destination buffer.
1834 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1835 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
Mark Mendell8f8926a2015-08-17 11:39:06 -04001836 __ rep_movsw();
1837
jessicahandojo4877b792016-09-08 19:49:13 -07001838 __ Bind(&done);
1839 if (mirror::kUseStringCompression) {
1840 // Restore EAX.
1841 __ popl(EAX);
1842 __ cfi().AdjustCFAOffset(-stack_adjust);
1843 }
1844 // Restore ECX.
Mark Mendell8f8926a2015-08-17 11:39:06 -04001845 __ popl(ECX);
1846 __ cfi().AdjustCFAOffset(-stack_adjust);
1847}
1848
Mark Mendell09ed1a32015-03-25 08:30:06 -04001849static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1850 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1851 Location out_loc = locations->Out();
1852 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1853 // to avoid a SIGBUS.
1854 switch (size) {
1855 case Primitive::kPrimByte:
1856 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1857 break;
1858 case Primitive::kPrimShort:
1859 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1860 break;
1861 case Primitive::kPrimInt:
1862 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1863 break;
1864 case Primitive::kPrimLong:
1865 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1866 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1867 break;
1868 default:
1869 LOG(FATAL) << "Type not recognized for peek: " << size;
1870 UNREACHABLE();
1871 }
1872}
1873
1874void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1875 CreateLongToIntLocations(arena_, invoke);
1876}
1877
1878void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1879 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1880}
1881
1882void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1883 CreateLongToIntLocations(arena_, invoke);
1884}
1885
1886void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1887 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1888}
1889
1890void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1891 CreateLongToLongLocations(arena_, invoke);
1892}
1893
1894void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1895 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1896}
1897
1898void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1899 CreateLongToIntLocations(arena_, invoke);
1900}
1901
1902void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1903 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1904}
1905
1906static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1907 HInvoke* invoke) {
1908 LocationSummary* locations = new (arena) LocationSummary(invoke,
1909 LocationSummary::kNoCall,
1910 kIntrinsified);
1911 locations->SetInAt(0, Location::RequiresRegister());
Roland Levillain4c0eb422015-04-24 16:43:49 +01001912 HInstruction* value = invoke->InputAt(1);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001913 if (size == Primitive::kPrimByte) {
1914 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1915 } else {
1916 locations->SetInAt(1, Location::RegisterOrConstant(value));
1917 }
1918}
1919
1920static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1921 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1922 Location value_loc = locations->InAt(1);
1923 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1924 // to avoid a SIGBUS.
1925 switch (size) {
1926 case Primitive::kPrimByte:
1927 if (value_loc.IsConstant()) {
1928 __ movb(Address(address, 0),
1929 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1930 } else {
1931 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1932 }
1933 break;
1934 case Primitive::kPrimShort:
1935 if (value_loc.IsConstant()) {
1936 __ movw(Address(address, 0),
1937 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1938 } else {
1939 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1940 }
1941 break;
1942 case Primitive::kPrimInt:
1943 if (value_loc.IsConstant()) {
1944 __ movl(Address(address, 0),
1945 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1946 } else {
1947 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1948 }
1949 break;
1950 case Primitive::kPrimLong:
1951 if (value_loc.IsConstant()) {
1952 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1953 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1954 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1955 } else {
1956 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1957 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1958 }
1959 break;
1960 default:
1961 LOG(FATAL) << "Type not recognized for poke: " << size;
1962 UNREACHABLE();
1963 }
1964}
1965
1966void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1967 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1968}
1969
1970void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1971 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1972}
1973
1974void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1975 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1976}
1977
1978void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1979 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1980}
1981
1982void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1983 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1984}
1985
1986void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1987 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1988}
1989
1990void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1991 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1992}
1993
1994void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1995 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1996}
1997
1998void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1999 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2000 LocationSummary::kNoCall,
2001 kIntrinsified);
2002 locations->SetOut(Location::RequiresRegister());
2003}
2004
2005void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
2006 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
Andreas Gampe542451c2016-07-26 09:02:02 -07002007 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
Mark Mendell09ed1a32015-03-25 08:30:06 -04002008}
2009
Roland Levillain0d5a2812015-11-13 10:07:31 +00002010static void GenUnsafeGet(HInvoke* invoke,
2011 Primitive::Type type,
2012 bool is_volatile,
2013 CodeGeneratorX86* codegen) {
2014 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2015 LocationSummary* locations = invoke->GetLocations();
2016 Location base_loc = locations->InAt(1);
2017 Register base = base_loc.AsRegister<Register>();
2018 Location offset_loc = locations->InAt(2);
2019 Register offset = offset_loc.AsRegisterPairLow<Register>();
2020 Location output_loc = locations->Out();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002021
2022 switch (type) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002023 case Primitive::kPrimInt: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002024 Register output = output_loc.AsRegister<Register>();
2025 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain7c1559a2015-12-15 10:55:36 +00002026 break;
2027 }
2028
2029 case Primitive::kPrimNot: {
2030 Register output = output_loc.AsRegister<Register>();
2031 if (kEmitCompilerReadBarrier) {
2032 if (kUseBakerReadBarrier) {
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08002033 Address src(base, offset, ScaleFactor::TIMES_1, 0);
2034 codegen->GenerateReferenceLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00002035 invoke, output_loc, base, src, /* needs_null_check */ false);
Roland Levillain7c1559a2015-12-15 10:55:36 +00002036 } else {
2037 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2038 codegen->GenerateReadBarrierSlow(
2039 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2040 }
2041 } else {
2042 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2043 __ MaybeUnpoisonHeapReference(output);
Roland Levillain4d027112015-07-01 15:41:14 +01002044 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002045 break;
Roland Levillain4d027112015-07-01 15:41:14 +01002046 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002047
2048 case Primitive::kPrimLong: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002049 Register output_lo = output_loc.AsRegisterPairLow<Register>();
2050 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002051 if (is_volatile) {
2052 // Need to use a XMM to read atomically.
2053 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2054 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
2055 __ movd(output_lo, temp);
2056 __ psrlq(temp, Immediate(32));
2057 __ movd(output_hi, temp);
2058 } else {
2059 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
2060 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
2061 }
2062 }
2063 break;
2064
2065 default:
2066 LOG(FATAL) << "Unsupported op size " << type;
2067 UNREACHABLE();
2068 }
2069}
2070
Roland Levillain7c1559a2015-12-15 10:55:36 +00002071static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
2072 HInvoke* invoke,
2073 Primitive::Type type,
2074 bool is_volatile) {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002075 bool can_call = kEmitCompilerReadBarrier &&
2076 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2077 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002078 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002079 (can_call
2080 ? LocationSummary::kCallOnSlowPath
2081 : LocationSummary::kNoCall),
Mark Mendell09ed1a32015-03-25 08:30:06 -04002082 kIntrinsified);
Vladimir Marko70e97462016-08-09 11:04:26 +01002083 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko804b03f2016-09-14 16:26:36 +01002084 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
Vladimir Marko70e97462016-08-09 11:04:26 +01002085 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002086 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2087 locations->SetInAt(1, Location::RequiresRegister());
2088 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillain7c1559a2015-12-15 10:55:36 +00002089 if (type == Primitive::kPrimLong) {
Mark Mendell09ed1a32015-03-25 08:30:06 -04002090 if (is_volatile) {
2091 // Need to use XMM to read volatile.
2092 locations->AddTemp(Location::RequiresFpuRegister());
Roland Levillain3d312422016-06-23 13:53:42 +01002093 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002094 } else {
2095 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2096 }
2097 } else {
Roland Levillain3d312422016-06-23 13:53:42 +01002098 locations->SetOut(Location::RequiresRegister(),
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002099 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
Mark Mendell09ed1a32015-03-25 08:30:06 -04002100 }
2101}
2102
2103void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002104 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002105}
2106void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002107 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002108}
2109void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002110 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002111}
2112void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002113 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002114}
2115void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002116 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002117}
2118void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002119 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002120}
2121
2122
2123void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002124 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002125}
2126void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002127 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002128}
2129void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002130 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002131}
2132void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002133 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002134}
2135void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002136 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002137}
2138void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002139 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002140}
2141
2142
2143static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2144 Primitive::Type type,
2145 HInvoke* invoke,
2146 bool is_volatile) {
2147 LocationSummary* locations = new (arena) LocationSummary(invoke,
2148 LocationSummary::kNoCall,
2149 kIntrinsified);
2150 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2151 locations->SetInAt(1, Location::RequiresRegister());
2152 locations->SetInAt(2, Location::RequiresRegister());
2153 locations->SetInAt(3, Location::RequiresRegister());
2154 if (type == Primitive::kPrimNot) {
2155 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01002156 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell09ed1a32015-03-25 08:30:06 -04002157 // Ensure the value is in a byte register.
2158 locations->AddTemp(Location::RegisterLocation(ECX));
2159 } else if (type == Primitive::kPrimLong && is_volatile) {
2160 locations->AddTemp(Location::RequiresFpuRegister());
2161 locations->AddTemp(Location::RequiresFpuRegister());
2162 }
2163}
2164
2165void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002166 CreateIntIntIntIntToVoidPlusTempsLocations(
2167 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002168}
2169void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002170 CreateIntIntIntIntToVoidPlusTempsLocations(
2171 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002172}
2173void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002174 CreateIntIntIntIntToVoidPlusTempsLocations(
2175 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002176}
2177void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002178 CreateIntIntIntIntToVoidPlusTempsLocations(
2179 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002180}
2181void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002182 CreateIntIntIntIntToVoidPlusTempsLocations(
2183 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002184}
2185void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002186 CreateIntIntIntIntToVoidPlusTempsLocations(
2187 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002188}
2189void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002190 CreateIntIntIntIntToVoidPlusTempsLocations(
2191 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002192}
2193void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002194 CreateIntIntIntIntToVoidPlusTempsLocations(
2195 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002196}
2197void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002198 CreateIntIntIntIntToVoidPlusTempsLocations(
2199 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002200}
2201
2202// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2203// memory model.
2204static void GenUnsafePut(LocationSummary* locations,
2205 Primitive::Type type,
2206 bool is_volatile,
2207 CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002208 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -04002209 Register base = locations->InAt(1).AsRegister<Register>();
2210 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2211 Location value_loc = locations->InAt(3);
2212
2213 if (type == Primitive::kPrimLong) {
2214 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2215 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2216 if (is_volatile) {
2217 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2218 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2219 __ movd(temp1, value_lo);
2220 __ movd(temp2, value_hi);
2221 __ punpckldq(temp1, temp2);
2222 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2223 } else {
2224 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2225 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2226 }
Roland Levillain4d027112015-07-01 15:41:14 +01002227 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2228 Register temp = locations->GetTemp(0).AsRegister<Register>();
2229 __ movl(temp, value_loc.AsRegister<Register>());
2230 __ PoisonHeapReference(temp);
2231 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002232 } else {
2233 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2234 }
2235
2236 if (is_volatile) {
Mark P Mendell17077d82015-12-16 19:15:59 +00002237 codegen->MemoryFence();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002238 }
2239
2240 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002241 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell09ed1a32015-03-25 08:30:06 -04002242 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2243 locations->GetTemp(1).AsRegister<Register>(),
2244 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002245 value_loc.AsRegister<Register>(),
2246 value_can_be_null);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002247 }
2248}
2249
2250void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002251 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002252}
2253void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002254 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002255}
2256void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002257 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002258}
2259void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002260 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002261}
2262void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002263 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002264}
2265void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002266 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002267}
2268void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002269 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002270}
2271void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002272 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002273}
2274void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002275 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002276}
2277
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002278static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
2279 Primitive::Type type,
Mark Mendell58d25fd2015-04-03 14:52:31 -04002280 HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002281 bool can_call = kEmitCompilerReadBarrier &&
2282 kUseBakerReadBarrier &&
2283 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002284 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002285 (can_call
2286 ? LocationSummary::kCallOnSlowPath
2287 : LocationSummary::kNoCall),
Mark Mendell58d25fd2015-04-03 14:52:31 -04002288 kIntrinsified);
2289 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2290 locations->SetInAt(1, Location::RequiresRegister());
2291 // Offset is a long, but in 32 bit mode, we only need the low word.
2292 // Can we update the invoke here to remove a TypeConvert to Long?
2293 locations->SetInAt(2, Location::RequiresRegister());
2294 // Expected value must be in EAX or EDX:EAX.
2295 // For long, new value must be in ECX:EBX.
2296 if (type == Primitive::kPrimLong) {
2297 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2298 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2299 } else {
2300 locations->SetInAt(3, Location::RegisterLocation(EAX));
2301 locations->SetInAt(4, Location::RequiresRegister());
2302 }
2303
2304 // Force a byte register for the output.
2305 locations->SetOut(Location::RegisterLocation(EAX));
2306 if (type == Primitive::kPrimNot) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002307 // Need temporary registers for card-marking, and possibly for
2308 // (Baker) read barrier.
Roland Levillainb488b782015-10-22 11:38:49 +01002309 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002310 // Need a byte register for marking.
2311 locations->AddTemp(Location::RegisterLocation(ECX));
2312 }
2313}
2314
2315void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2316 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2317}
2318
2319void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2320 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2321}
2322
2323void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002324 // The only read barrier implementation supporting the
2325 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2326 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain391b8662015-12-18 11:43:38 +00002327 return;
2328 }
2329
Mark Mendell58d25fd2015-04-03 14:52:31 -04002330 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2331}
2332
2333static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002334 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell58d25fd2015-04-03 14:52:31 -04002335 LocationSummary* locations = invoke->GetLocations();
2336
2337 Register base = locations->InAt(1).AsRegister<Register>();
2338 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2339 Location out = locations->Out();
2340 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2341
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002342 // The address of the field within the holding object.
2343 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2344
Roland Levillainb488b782015-10-22 11:38:49 +01002345 if (type == Primitive::kPrimNot) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002346 // The only read barrier implementation supporting the
2347 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2348 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2349
2350 Location temp1_loc = locations->GetTemp(0);
2351 Register temp1 = temp1_loc.AsRegister<Register>();
2352 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2353
Roland Levillain4d027112015-07-01 15:41:14 +01002354 Register expected = locations->InAt(3).AsRegister<Register>();
Roland Levillainb488b782015-10-22 11:38:49 +01002355 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
Roland Levillain4d027112015-07-01 15:41:14 +01002356 DCHECK_EQ(expected, EAX);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002357 Register value = locations->InAt(4).AsRegister<Register>();
Roland Levillain4d027112015-07-01 15:41:14 +01002358
Roland Levillainb488b782015-10-22 11:38:49 +01002359 // Mark card for object assuming new value is stored.
2360 bool value_can_be_null = true; // TODO: Worth finding out this information?
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002361 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2362
2363 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2364 // Need to make sure the reference stored in the field is a to-space
2365 // one before attempting the CAS or the CAS could fail incorrectly.
2366 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2367 invoke,
2368 temp1_loc, // Unused, used only as a "temporary" within the read barrier.
2369 base,
2370 field_addr,
2371 /* needs_null_check */ false,
2372 /* always_update_field */ true,
2373 &temp2);
2374 }
Roland Levillainb488b782015-10-22 11:38:49 +01002375
2376 bool base_equals_value = (base == value);
2377 if (kPoisonHeapReferences) {
2378 if (base_equals_value) {
2379 // If `base` and `value` are the same register location, move
2380 // `value` to a temporary register. This way, poisoning
2381 // `value` won't invalidate `base`.
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002382 value = temp1;
Roland Levillainb488b782015-10-22 11:38:49 +01002383 __ movl(value, base);
Roland Levillain4d027112015-07-01 15:41:14 +01002384 }
Roland Levillainb488b782015-10-22 11:38:49 +01002385
2386 // Check that the register allocator did not assign the location
2387 // of `expected` (EAX) to `value` nor to `base`, so that heap
2388 // poisoning (when enabled) works as intended below.
2389 // - If `value` were equal to `expected`, both references would
2390 // be poisoned twice, meaning they would not be poisoned at
2391 // all, as heap poisoning uses address negation.
2392 // - If `base` were equal to `expected`, poisoning `expected`
2393 // would invalidate `base`.
2394 DCHECK_NE(value, expected);
2395 DCHECK_NE(base, expected);
2396
2397 __ PoisonHeapReference(expected);
2398 __ PoisonHeapReference(value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002399 }
2400
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002401 __ LockCmpxchgl(field_addr, value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002402
Roland Levillain0d5a2812015-11-13 10:07:31 +00002403 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002404 // scheduling barriers at this time.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002405
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002406 // Convert ZF into the Boolean result.
Roland Levillainb488b782015-10-22 11:38:49 +01002407 __ setb(kZero, out.AsRegister<Register>());
2408 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002409
Roland Levillain391b8662015-12-18 11:43:38 +00002410 // If heap poisoning is enabled, we need to unpoison the values
2411 // that were poisoned earlier.
Roland Levillainb488b782015-10-22 11:38:49 +01002412 if (kPoisonHeapReferences) {
2413 if (base_equals_value) {
2414 // `value` has been moved to a temporary register, no need to
2415 // unpoison it.
2416 } else {
2417 // Ensure `value` is different from `out`, so that unpoisoning
2418 // the former does not invalidate the latter.
2419 DCHECK_NE(value, out.AsRegister<Register>());
2420 __ UnpoisonHeapReference(value);
2421 }
2422 // Do not unpoison the reference contained in register
2423 // `expected`, as it is the same as register `out` (EAX).
2424 }
2425 } else {
2426 if (type == Primitive::kPrimInt) {
2427 // Ensure the expected value is in EAX (required by the CMPXCHG
2428 // instruction).
2429 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002430 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
Roland Levillainb488b782015-10-22 11:38:49 +01002431 } else if (type == Primitive::kPrimLong) {
2432 // Ensure the expected value is in EAX:EDX and that the new
2433 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2434 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2435 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2436 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2437 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002438 __ LockCmpxchg8b(field_addr);
Roland Levillainb488b782015-10-22 11:38:49 +01002439 } else {
2440 LOG(FATAL) << "Unexpected CAS type " << type;
2441 }
2442
Roland Levillain0d5a2812015-11-13 10:07:31 +00002443 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2444 // don't need scheduling barriers at this time.
Roland Levillainb488b782015-10-22 11:38:49 +01002445
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002446 // Convert ZF into the Boolean result.
Roland Levillainb488b782015-10-22 11:38:49 +01002447 __ setb(kZero, out.AsRegister<Register>());
2448 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002449 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04002450}
2451
2452void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2453 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2454}
2455
2456void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2457 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2458}
2459
2460void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002461 // The only read barrier implementation supporting the
2462 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2463 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
Roland Levillain3d312422016-06-23 13:53:42 +01002464
Mark Mendell58d25fd2015-04-03 14:52:31 -04002465 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2466}
2467
2468void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2469 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2470 LocationSummary::kNoCall,
2471 kIntrinsified);
2472 locations->SetInAt(0, Location::RequiresRegister());
2473 locations->SetOut(Location::SameAsFirstInput());
2474 locations->AddTemp(Location::RequiresRegister());
2475}
2476
2477static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2478 X86Assembler* assembler) {
2479 Immediate imm_shift(shift);
2480 Immediate imm_mask(mask);
2481 __ movl(temp, reg);
2482 __ shrl(reg, imm_shift);
2483 __ andl(temp, imm_mask);
2484 __ andl(reg, imm_mask);
2485 __ shll(temp, imm_shift);
2486 __ orl(reg, temp);
2487}
2488
2489void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002490 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002491 LocationSummary* locations = invoke->GetLocations();
2492
2493 Register reg = locations->InAt(0).AsRegister<Register>();
2494 Register temp = locations->GetTemp(0).AsRegister<Register>();
2495
2496 /*
2497 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2498 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2499 * compared to generic luni implementation which has 5 rounds of swapping bits.
2500 * x = bswap x
2501 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2502 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2503 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2504 */
2505 __ bswapl(reg);
2506 SwapBits(reg, temp, 1, 0x55555555, assembler);
2507 SwapBits(reg, temp, 2, 0x33333333, assembler);
2508 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2509}
2510
2511void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2512 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2513 LocationSummary::kNoCall,
2514 kIntrinsified);
2515 locations->SetInAt(0, Location::RequiresRegister());
2516 locations->SetOut(Location::SameAsFirstInput());
2517 locations->AddTemp(Location::RequiresRegister());
2518}
2519
2520void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002521 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002522 LocationSummary* locations = invoke->GetLocations();
2523
2524 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2525 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2526 Register temp = locations->GetTemp(0).AsRegister<Register>();
2527
2528 // We want to swap high/low, then bswap each one, and then do the same
2529 // as a 32 bit reverse.
2530 // Exchange high and low.
2531 __ movl(temp, reg_low);
2532 __ movl(reg_low, reg_high);
2533 __ movl(reg_high, temp);
2534
2535 // bit-reverse low
2536 __ bswapl(reg_low);
2537 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2538 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2539 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2540
2541 // bit-reverse high
2542 __ bswapl(reg_high);
2543 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2544 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2545 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2546}
2547
Aart Bikc39dac12016-01-21 08:59:48 -08002548static void CreateBitCountLocations(
2549 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2550 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2551 // Do nothing if there is no popcnt support. This results in generating
2552 // a call for the intrinsic rather than direct code.
2553 return;
2554 }
2555 LocationSummary* locations = new (arena) LocationSummary(invoke,
2556 LocationSummary::kNoCall,
2557 kIntrinsified);
2558 if (is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002559 locations->AddTemp(Location::RequiresRegister());
Aart Bikc39dac12016-01-21 08:59:48 -08002560 }
Aart Bik2a946072016-01-21 12:49:00 -08002561 locations->SetInAt(0, Location::Any());
Aart Bikc39dac12016-01-21 08:59:48 -08002562 locations->SetOut(Location::RequiresRegister());
2563}
2564
Aart Bika19616e2016-02-01 18:57:58 -08002565static void GenBitCount(X86Assembler* assembler,
2566 CodeGeneratorX86* codegen,
2567 HInvoke* invoke, bool is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002568 LocationSummary* locations = invoke->GetLocations();
2569 Location src = locations->InAt(0);
2570 Register out = locations->Out().AsRegister<Register>();
2571
2572 if (invoke->InputAt(0)->IsConstant()) {
2573 // Evaluate this at compile time.
2574 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
Roland Levillainfa3912e2016-04-01 18:21:55 +01002575 int32_t result = is_long
Aart Bikc39dac12016-01-21 08:59:48 -08002576 ? POPCOUNT(static_cast<uint64_t>(value))
2577 : POPCOUNT(static_cast<uint32_t>(value));
Roland Levillainfa3912e2016-04-01 18:21:55 +01002578 codegen->Load32BitValue(out, result);
Aart Bikc39dac12016-01-21 08:59:48 -08002579 return;
2580 }
2581
2582 // Handle the non-constant cases.
2583 if (!is_long) {
2584 if (src.IsRegister()) {
2585 __ popcntl(out, src.AsRegister<Register>());
2586 } else {
2587 DCHECK(src.IsStackSlot());
2588 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2589 }
Aart Bik2a946072016-01-21 12:49:00 -08002590 } else {
2591 // The 64-bit case needs to worry about two parts.
2592 Register temp = locations->GetTemp(0).AsRegister<Register>();
2593 if (src.IsRegisterPair()) {
2594 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2595 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2596 } else {
2597 DCHECK(src.IsDoubleStackSlot());
2598 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2599 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2600 }
2601 __ addl(out, temp);
Aart Bikc39dac12016-01-21 08:59:48 -08002602 }
Aart Bikc39dac12016-01-21 08:59:48 -08002603}
2604
2605void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2606 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2607}
2608
2609void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002610 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
Aart Bikc39dac12016-01-21 08:59:48 -08002611}
2612
2613void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2614 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2615}
2616
2617void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002618 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
Aart Bikc39dac12016-01-21 08:59:48 -08002619}
2620
Mark Mendelld5897672015-08-12 21:16:41 -04002621static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2622 LocationSummary* locations = new (arena) LocationSummary(invoke,
2623 LocationSummary::kNoCall,
2624 kIntrinsified);
2625 if (is_long) {
2626 locations->SetInAt(0, Location::RequiresRegister());
2627 } else {
2628 locations->SetInAt(0, Location::Any());
2629 }
2630 locations->SetOut(Location::RequiresRegister());
2631}
2632
Aart Bika19616e2016-02-01 18:57:58 -08002633static void GenLeadingZeros(X86Assembler* assembler,
2634 CodeGeneratorX86* codegen,
2635 HInvoke* invoke, bool is_long) {
Mark Mendelld5897672015-08-12 21:16:41 -04002636 LocationSummary* locations = invoke->GetLocations();
2637 Location src = locations->InAt(0);
2638 Register out = locations->Out().AsRegister<Register>();
2639
2640 if (invoke->InputAt(0)->IsConstant()) {
2641 // Evaluate this at compile time.
2642 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2643 if (value == 0) {
2644 value = is_long ? 64 : 32;
2645 } else {
2646 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2647 }
Aart Bika19616e2016-02-01 18:57:58 -08002648 codegen->Load32BitValue(out, value);
Mark Mendelld5897672015-08-12 21:16:41 -04002649 return;
2650 }
2651
2652 // Handle the non-constant cases.
2653 if (!is_long) {
2654 if (src.IsRegister()) {
2655 __ bsrl(out, src.AsRegister<Register>());
2656 } else {
2657 DCHECK(src.IsStackSlot());
2658 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2659 }
2660
2661 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04002662 NearLabel all_zeroes, done;
Mark Mendelld5897672015-08-12 21:16:41 -04002663 __ j(kEqual, &all_zeroes);
2664
2665 // Correct the result from BSR to get the final CLZ result.
2666 __ xorl(out, Immediate(31));
2667 __ jmp(&done);
2668
2669 // Fix the zero case with the expected result.
2670 __ Bind(&all_zeroes);
2671 __ movl(out, Immediate(32));
2672
2673 __ Bind(&done);
2674 return;
2675 }
2676
2677 // 64 bit case needs to worry about both parts of the register.
2678 DCHECK(src.IsRegisterPair());
2679 Register src_lo = src.AsRegisterPairLow<Register>();
2680 Register src_hi = src.AsRegisterPairHigh<Register>();
Mark Mendell0c9497d2015-08-21 09:30:05 -04002681 NearLabel handle_low, done, all_zeroes;
Mark Mendelld5897672015-08-12 21:16:41 -04002682
2683 // Is the high word zero?
2684 __ testl(src_hi, src_hi);
2685 __ j(kEqual, &handle_low);
2686
2687 // High word is not zero. We know that the BSR result is defined in this case.
2688 __ bsrl(out, src_hi);
2689
2690 // Correct the result from BSR to get the final CLZ result.
2691 __ xorl(out, Immediate(31));
2692 __ jmp(&done);
2693
2694 // High word was zero. We have to compute the low word count and add 32.
2695 __ Bind(&handle_low);
2696 __ bsrl(out, src_lo);
2697 __ j(kEqual, &all_zeroes);
2698
2699 // We had a valid result. Use an XOR to both correct the result and add 32.
2700 __ xorl(out, Immediate(63));
2701 __ jmp(&done);
2702
2703 // All zero case.
2704 __ Bind(&all_zeroes);
2705 __ movl(out, Immediate(64));
2706
2707 __ Bind(&done);
2708}
2709
2710void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2711 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2712}
2713
2714void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002715 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendelld5897672015-08-12 21:16:41 -04002716}
2717
2718void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2719 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2720}
2721
2722void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002723 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendelld5897672015-08-12 21:16:41 -04002724}
2725
Mark Mendell2d554792015-09-15 21:45:18 -04002726static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2727 LocationSummary* locations = new (arena) LocationSummary(invoke,
2728 LocationSummary::kNoCall,
2729 kIntrinsified);
2730 if (is_long) {
2731 locations->SetInAt(0, Location::RequiresRegister());
2732 } else {
2733 locations->SetInAt(0, Location::Any());
2734 }
2735 locations->SetOut(Location::RequiresRegister());
2736}
2737
Aart Bika19616e2016-02-01 18:57:58 -08002738static void GenTrailingZeros(X86Assembler* assembler,
2739 CodeGeneratorX86* codegen,
2740 HInvoke* invoke, bool is_long) {
Mark Mendell2d554792015-09-15 21:45:18 -04002741 LocationSummary* locations = invoke->GetLocations();
2742 Location src = locations->InAt(0);
2743 Register out = locations->Out().AsRegister<Register>();
2744
2745 if (invoke->InputAt(0)->IsConstant()) {
2746 // Evaluate this at compile time.
2747 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2748 if (value == 0) {
2749 value = is_long ? 64 : 32;
2750 } else {
2751 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2752 }
Aart Bika19616e2016-02-01 18:57:58 -08002753 codegen->Load32BitValue(out, value);
Mark Mendell2d554792015-09-15 21:45:18 -04002754 return;
2755 }
2756
2757 // Handle the non-constant cases.
2758 if (!is_long) {
2759 if (src.IsRegister()) {
2760 __ bsfl(out, src.AsRegister<Register>());
2761 } else {
2762 DCHECK(src.IsStackSlot());
2763 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2764 }
2765
2766 // BSF sets ZF if the input was zero, and the output is undefined.
2767 NearLabel done;
2768 __ j(kNotEqual, &done);
2769
2770 // Fix the zero case with the expected result.
2771 __ movl(out, Immediate(32));
2772
2773 __ Bind(&done);
2774 return;
2775 }
2776
2777 // 64 bit case needs to worry about both parts of the register.
2778 DCHECK(src.IsRegisterPair());
2779 Register src_lo = src.AsRegisterPairLow<Register>();
2780 Register src_hi = src.AsRegisterPairHigh<Register>();
2781 NearLabel done, all_zeroes;
2782
2783 // If the low word is zero, then ZF will be set. If not, we have the answer.
2784 __ bsfl(out, src_lo);
2785 __ j(kNotEqual, &done);
2786
2787 // Low word was zero. We have to compute the high word count and add 32.
2788 __ bsfl(out, src_hi);
2789 __ j(kEqual, &all_zeroes);
2790
2791 // We had a valid result. Add 32 to account for the low word being zero.
2792 __ addl(out, Immediate(32));
2793 __ jmp(&done);
2794
2795 // All zero case.
2796 __ Bind(&all_zeroes);
2797 __ movl(out, Immediate(64));
2798
2799 __ Bind(&done);
2800}
2801
2802void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2803 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2804}
2805
2806void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002807 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendell2d554792015-09-15 21:45:18 -04002808}
2809
2810void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2811 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2812}
2813
2814void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002815 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendell2d554792015-09-15 21:45:18 -04002816}
2817
Serguei Katkov288c7a82016-05-16 11:53:15 +06002818void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
2819 if (kEmitCompilerReadBarrier) {
2820 // Do not intrinsify this call with the read barrier configuration.
2821 return;
2822 }
2823 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2824 LocationSummary::kCallOnSlowPath,
2825 kIntrinsified);
2826 locations->SetInAt(0, Location::RequiresRegister());
2827 locations->SetOut(Location::SameAsFirstInput());
2828 locations->AddTemp(Location::RequiresRegister());
2829}
2830
2831void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
2832 DCHECK(!kEmitCompilerReadBarrier);
2833 LocationSummary* locations = invoke->GetLocations();
2834 X86Assembler* assembler = GetAssembler();
2835
2836 Register obj = locations->InAt(0).AsRegister<Register>();
2837 Register out = locations->Out().AsRegister<Register>();
2838
2839 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2840 codegen_->AddSlowPath(slow_path);
2841
2842 // Load ArtMethod first.
2843 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2844 DCHECK(invoke_direct != nullptr);
2845 Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2846 invoke_direct, locations->GetTemp(0));
2847 DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2848 Register temp = temp_loc.AsRegister<Register>();
2849
2850 // Now get declaring class.
2851 __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2852
2853 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2854 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2855 DCHECK_NE(slow_path_flag_offset, 0u);
2856 DCHECK_NE(disable_flag_offset, 0u);
2857 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2858
2859 // Check static flags preventing us for using intrinsic.
2860 if (slow_path_flag_offset == disable_flag_offset + 1) {
2861 __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2862 __ j(kNotEqual, slow_path->GetEntryLabel());
2863 } else {
2864 __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2865 __ j(kNotEqual, slow_path->GetEntryLabel());
2866 __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2867 __ j(kNotEqual, slow_path->GetEntryLabel());
2868 }
2869
2870 // Fast path.
2871 __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2872 codegen_->MaybeRecordImplicitNullCheck(invoke);
2873 __ MaybeUnpoisonHeapReference(out);
2874 __ Bind(slow_path->GetExitLabel());
2875}
2876
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002877static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2878 return instruction->InputAt(input0) == instruction->InputAt(input1);
2879}
2880
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002881// Compute base address for the System.arraycopy intrinsic in `base`.
2882static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2883 Primitive::Type type,
2884 const Register& array,
2885 const Location& pos,
2886 const Register& base) {
2887 // This routine is only used by the SystemArrayCopy intrinsic at the
2888 // moment. We can allow Primitive::kPrimNot as `type` to implement
2889 // the SystemArrayCopyChar intrinsic.
2890 DCHECK_EQ(type, Primitive::kPrimNot);
2891 const int32_t element_size = Primitive::ComponentSize(type);
2892 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2893 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2894
2895 if (pos.IsConstant()) {
2896 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2897 __ leal(base, Address(array, element_size * constant + data_offset));
2898 } else {
2899 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2900 }
2901}
2902
2903// Compute end source address for the System.arraycopy intrinsic in `end`.
2904static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2905 Primitive::Type type,
2906 const Location& copy_length,
2907 const Register& base,
2908 const Register& end) {
2909 // This routine is only used by the SystemArrayCopy intrinsic at the
2910 // moment. We can allow Primitive::kPrimNot as `type` to implement
2911 // the SystemArrayCopyChar intrinsic.
2912 DCHECK_EQ(type, Primitive::kPrimNot);
2913 const int32_t element_size = Primitive::ComponentSize(type);
2914 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2915
2916 if (copy_length.IsConstant()) {
2917 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2918 __ leal(end, Address(base, element_size * constant));
2919 } else {
2920 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2921 }
2922}
2923
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002924void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002925 // The only read barrier implementation supporting the
2926 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2927 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002928 return;
2929 }
2930
2931 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2932 if (invoke->GetLocations() != nullptr) {
2933 // Need a byte register for marking.
2934 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2935
2936 static constexpr size_t kSrc = 0;
2937 static constexpr size_t kSrcPos = 1;
2938 static constexpr size_t kDest = 2;
2939 static constexpr size_t kDestPos = 3;
2940 static constexpr size_t kLength = 4;
2941
2942 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2943 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2944 !invoke->InputAt(kLength)->IsIntConstant()) {
2945 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2946 !IsSameInput(invoke, kSrcPos, kLength) &&
2947 !IsSameInput(invoke, kDestPos, kLength) &&
2948 !IsSameInput(invoke, kSrc, kDest)) {
2949 // Not enough registers, make the length also take a stack slot.
2950 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2951 }
2952 }
2953 }
2954}
2955
2956void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002957 // The only read barrier implementation supporting the
2958 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2959 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002960
2961 X86Assembler* assembler = GetAssembler();
2962 LocationSummary* locations = invoke->GetLocations();
2963
2964 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2965 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2966 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2967 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Roland Levillain0b671c02016-08-19 12:02:34 +01002968 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002969
2970 Register src = locations->InAt(0).AsRegister<Register>();
2971 Location src_pos = locations->InAt(1);
2972 Register dest = locations->InAt(2).AsRegister<Register>();
2973 Location dest_pos = locations->InAt(3);
Roland Levillain0b671c02016-08-19 12:02:34 +01002974 Location length_arg = locations->InAt(4);
2975 Location length = length_arg;
2976 Location temp1_loc = locations->GetTemp(0);
2977 Register temp1 = temp1_loc.AsRegister<Register>();
2978 Location temp2_loc = locations->GetTemp(1);
2979 Register temp2 = temp2_loc.AsRegister<Register>();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002980
Roland Levillain0b671c02016-08-19 12:02:34 +01002981 SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2982 codegen_->AddSlowPath(intrinsic_slow_path);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002983
2984 NearLabel conditions_on_positions_validated;
2985 SystemArrayCopyOptimizations optimizations(invoke);
2986
2987 // If source and destination are the same, we go to slow path if we need to do
2988 // forward copying.
2989 if (src_pos.IsConstant()) {
2990 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2991 if (dest_pos.IsConstant()) {
2992 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2993 if (optimizations.GetDestinationIsSource()) {
2994 // Checked when building locations.
2995 DCHECK_GE(src_pos_constant, dest_pos_constant);
2996 } else if (src_pos_constant < dest_pos_constant) {
2997 __ cmpl(src, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01002998 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002999 }
3000 } else {
3001 if (!optimizations.GetDestinationIsSource()) {
3002 __ cmpl(src, dest);
3003 __ j(kNotEqual, &conditions_on_positions_validated);
3004 }
3005 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
Roland Levillain0b671c02016-08-19 12:02:34 +01003006 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003007 }
3008 } else {
3009 if (!optimizations.GetDestinationIsSource()) {
3010 __ cmpl(src, dest);
3011 __ j(kNotEqual, &conditions_on_positions_validated);
3012 }
3013 if (dest_pos.IsConstant()) {
3014 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3015 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
Roland Levillain0b671c02016-08-19 12:02:34 +01003016 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003017 } else {
3018 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
Roland Levillain0b671c02016-08-19 12:02:34 +01003019 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003020 }
3021 }
3022
3023 __ Bind(&conditions_on_positions_validated);
3024
3025 if (!optimizations.GetSourceIsNotNull()) {
3026 // Bail out if the source is null.
3027 __ testl(src, src);
Roland Levillain0b671c02016-08-19 12:02:34 +01003028 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003029 }
3030
3031 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3032 // Bail out if the destination is null.
3033 __ testl(dest, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01003034 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003035 }
3036
Roland Levillain0b671c02016-08-19 12:02:34 +01003037 Location temp3_loc = locations->GetTemp(2);
3038 Register temp3 = temp3_loc.AsRegister<Register>();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003039 if (length.IsStackSlot()) {
3040 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3041 length = Location::RegisterLocation(temp3);
3042 }
3043
3044 // If the length is negative, bail out.
3045 // We have already checked in the LocationsBuilder for the constant case.
3046 if (!length.IsConstant() &&
3047 !optimizations.GetCountIsSourceLength() &&
3048 !optimizations.GetCountIsDestinationLength()) {
3049 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
Roland Levillain0b671c02016-08-19 12:02:34 +01003050 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003051 }
3052
3053 // Validity checks: source.
3054 CheckPosition(assembler,
3055 src_pos,
3056 src,
3057 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01003058 intrinsic_slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003059 temp1,
3060 optimizations.GetCountIsSourceLength());
3061
3062 // Validity checks: dest.
3063 CheckPosition(assembler,
3064 dest_pos,
3065 dest,
3066 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01003067 intrinsic_slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003068 temp1,
3069 optimizations.GetCountIsDestinationLength());
3070
3071 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3072 // Check whether all elements of the source array are assignable to the component
3073 // type of the destination array. We do two checks: the classes are the same,
3074 // or the destination is Object[]. If none of these checks succeed, we go to the
3075 // slow path.
Roland Levillain0b671c02016-08-19 12:02:34 +01003076
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003077 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
Roland Levillain0b671c02016-08-19 12:02:34 +01003078 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3079 // /* HeapReference<Class> */ temp1 = src->klass_
3080 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003081 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003082 // Bail out if the source is not a non primitive array.
3083 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3084 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003085 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003086 __ testl(temp1, temp1);
3087 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3088 // If heap poisoning is enabled, `temp1` has been unpoisoned
3089 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3090 } else {
3091 // /* HeapReference<Class> */ temp1 = src->klass_
3092 __ movl(temp1, Address(src, class_offset));
3093 __ MaybeUnpoisonHeapReference(temp1);
3094 // Bail out if the source is not a non primitive array.
3095 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3096 __ movl(temp1, Address(temp1, component_offset));
3097 __ testl(temp1, temp1);
3098 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3099 __ MaybeUnpoisonHeapReference(temp1);
3100 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003101 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
Roland Levillain0b671c02016-08-19 12:02:34 +01003102 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003103 }
3104
Roland Levillain0b671c02016-08-19 12:02:34 +01003105 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3106 if (length.Equals(Location::RegisterLocation(temp3))) {
3107 // When Baker read barriers are enabled, register `temp3`,
3108 // which in the present case contains the `length` parameter,
3109 // will be overwritten below. Make the `length` location
3110 // reference the original stack location; it will be moved
3111 // back to `temp3` later if necessary.
3112 DCHECK(length_arg.IsStackSlot());
3113 length = length_arg;
3114 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003115
Roland Levillain0b671c02016-08-19 12:02:34 +01003116 // /* HeapReference<Class> */ temp1 = dest->klass_
3117 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003118 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003119
Roland Levillain0b671c02016-08-19 12:02:34 +01003120 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3121 // Bail out if the destination is not a non primitive array.
3122 //
3123 // Register `temp1` is not trashed by the read barrier emitted
3124 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3125 // method produces a call to a ReadBarrierMarkRegX entry point,
3126 // which saves all potentially live registers, including
3127 // temporaries such a `temp1`.
3128 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3129 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003130 invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003131 __ testl(temp2, temp2);
3132 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3133 // If heap poisoning is enabled, `temp2` has been unpoisoned
3134 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3135 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3136 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3137 }
3138
3139 // For the same reason given earlier, `temp1` is not trashed by the
3140 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3141 // /* HeapReference<Class> */ temp2 = src->klass_
3142 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003143 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003144 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3145 __ cmpl(temp1, temp2);
3146
3147 if (optimizations.GetDestinationIsTypedObjectArray()) {
3148 NearLabel do_copy;
3149 __ j(kEqual, &do_copy);
3150 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3151 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003152 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003153 // We do not need to emit a read barrier for the following
3154 // heap reference load, as `temp1` is only used in a
3155 // comparison with null below, and this reference is not
3156 // kept afterwards.
3157 __ cmpl(Address(temp1, super_offset), Immediate(0));
3158 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3159 __ Bind(&do_copy);
3160 } else {
3161 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3162 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003163 } else {
Roland Levillain0b671c02016-08-19 12:02:34 +01003164 // Non read barrier code.
3165
3166 // /* HeapReference<Class> */ temp1 = dest->klass_
3167 __ movl(temp1, Address(dest, class_offset));
3168 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3169 __ MaybeUnpoisonHeapReference(temp1);
3170 // Bail out if the destination is not a non primitive array.
3171 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3172 __ movl(temp2, Address(temp1, component_offset));
3173 __ testl(temp2, temp2);
3174 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3175 __ MaybeUnpoisonHeapReference(temp2);
3176 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3177 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3178 // Re-poison the heap reference to make the compare instruction below
3179 // compare two poisoned references.
3180 __ PoisonHeapReference(temp1);
3181 }
3182
3183 // Note: if heap poisoning is on, we are comparing two poisoned references here.
3184 __ cmpl(temp1, Address(src, class_offset));
3185
3186 if (optimizations.GetDestinationIsTypedObjectArray()) {
3187 NearLabel do_copy;
3188 __ j(kEqual, &do_copy);
3189 __ MaybeUnpoisonHeapReference(temp1);
3190 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3191 __ movl(temp1, Address(temp1, component_offset));
3192 __ MaybeUnpoisonHeapReference(temp1);
3193 __ cmpl(Address(temp1, super_offset), Immediate(0));
3194 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3195 __ Bind(&do_copy);
3196 } else {
3197 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3198 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003199 }
3200 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3201 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3202 // Bail out if the source is not a non primitive array.
Roland Levillain0b671c02016-08-19 12:02:34 +01003203 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3204 // /* HeapReference<Class> */ temp1 = src->klass_
3205 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003206 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003207 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3208 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003209 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003210 __ testl(temp1, temp1);
3211 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3212 // If heap poisoning is enabled, `temp1` has been unpoisoned
3213 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3214 } else {
3215 // /* HeapReference<Class> */ temp1 = src->klass_
3216 __ movl(temp1, Address(src, class_offset));
3217 __ MaybeUnpoisonHeapReference(temp1);
3218 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3219 __ movl(temp1, Address(temp1, component_offset));
3220 __ testl(temp1, temp1);
3221 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3222 __ MaybeUnpoisonHeapReference(temp1);
3223 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003224 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
Roland Levillain0b671c02016-08-19 12:02:34 +01003225 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003226 }
3227
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003228 const Primitive::Type type = Primitive::kPrimNot;
3229 const int32_t element_size = Primitive::ComponentSize(type);
3230
Roland Levillain0b671c02016-08-19 12:02:34 +01003231 // Compute the base source address in `temp1`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003232 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003233
Roland Levillain0b671c02016-08-19 12:02:34 +01003234 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3235 // If it is needed (in the case of the fast-path loop), the base
3236 // destination address is computed later, as `temp2` is used for
3237 // intermediate computations.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003238
Roland Levillain0b671c02016-08-19 12:02:34 +01003239 // Compute the end source address in `temp3`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003240 if (length.IsStackSlot()) {
3241 // Location `length` is again pointing at a stack slot, as
3242 // register `temp3` (which was containing the length parameter
3243 // earlier) has been overwritten; restore it now
3244 DCHECK(length.Equals(length_arg));
3245 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3246 length = Location::RegisterLocation(temp3);
Roland Levillain0b671c02016-08-19 12:02:34 +01003247 }
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003248 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003249
Roland Levillain0b671c02016-08-19 12:02:34 +01003250 // SystemArrayCopy implementation for Baker read barriers (see
3251 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3252 //
3253 // if (src_ptr != end_ptr) {
3254 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3255 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003256 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Roland Levillain0b671c02016-08-19 12:02:34 +01003257 // if (is_gray) {
3258 // // Slow-path copy.
3259 // for (size_t i = 0; i != length; ++i) {
3260 // dest_array[dest_pos + i] =
3261 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3262 // }
3263 // } else {
3264 // // Fast-path copy.
3265 // do {
3266 // *dest_ptr++ = *src_ptr++;
3267 // } while (src_ptr != end_ptr)
3268 // }
3269 // }
3270
3271 NearLabel loop, done;
3272
3273 // Don't enter copy loop if `length == 0`.
3274 __ cmpl(temp1, temp3);
3275 __ j(kEqual, &done);
3276
Vladimir Marko953437b2016-08-24 08:30:46 +00003277 // Given the numeric representation, it's enough to check the low bit of the rb_state.
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003278 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
3279 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Vladimir Marko953437b2016-08-24 08:30:46 +00003280 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3281 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3282 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3283
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003284 // if (rb_state == ReadBarrier::GrayState())
Vladimir Marko953437b2016-08-24 08:30:46 +00003285 // goto slow_path;
3286 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3287 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
Roland Levillain0b671c02016-08-19 12:02:34 +01003288
3289 // Load fence to prevent load-load reordering.
3290 // Note that this is a no-op, thanks to the x86 memory model.
3291 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3292
3293 // Slow path used to copy array when `src` is gray.
3294 SlowPathCode* read_barrier_slow_path =
3295 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3296 codegen_->AddSlowPath(read_barrier_slow_path);
3297
Vladimir Marko953437b2016-08-24 08:30:46 +00003298 // We have done the "if" of the gray bit check above, now branch based on the flags.
3299 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
Roland Levillain0b671c02016-08-19 12:02:34 +01003300
3301 // Fast-path copy.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003302 // Compute the base destination address in `temp2`.
3303 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
Roland Levillain0b671c02016-08-19 12:02:34 +01003304 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3305 // poison/unpoison.
3306 __ Bind(&loop);
3307 __ pushl(Address(temp1, 0));
3308 __ cfi().AdjustCFAOffset(4);
3309 __ popl(Address(temp2, 0));
3310 __ cfi().AdjustCFAOffset(-4);
3311 __ addl(temp1, Immediate(element_size));
3312 __ addl(temp2, Immediate(element_size));
3313 __ cmpl(temp1, temp3);
3314 __ j(kNotEqual, &loop);
3315
3316 __ Bind(read_barrier_slow_path->GetExitLabel());
3317 __ Bind(&done);
3318 } else {
3319 // Non read barrier code.
Roland Levillain0b671c02016-08-19 12:02:34 +01003320 // Compute the base destination address in `temp2`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003321 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
Roland Levillain0b671c02016-08-19 12:02:34 +01003322 // Compute the end source address in `temp3`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003323 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
Roland Levillain0b671c02016-08-19 12:02:34 +01003324 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3325 // poison/unpoison.
3326 NearLabel loop, done;
3327 __ cmpl(temp1, temp3);
3328 __ j(kEqual, &done);
3329 __ Bind(&loop);
3330 __ pushl(Address(temp1, 0));
3331 __ cfi().AdjustCFAOffset(4);
3332 __ popl(Address(temp2, 0));
3333 __ cfi().AdjustCFAOffset(-4);
3334 __ addl(temp1, Immediate(element_size));
3335 __ addl(temp2, Immediate(element_size));
3336 __ cmpl(temp1, temp3);
3337 __ j(kNotEqual, &loop);
3338 __ Bind(&done);
3339 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003340
3341 // We only need one card marking on the destination array.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003342 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003343
Roland Levillain0b671c02016-08-19 12:02:34 +01003344 __ Bind(intrinsic_slow_path->GetExitLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003345}
3346
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003347void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3348 InvokeRuntimeCallingConvention calling_convention;
3349 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3350 invoke,
3351 codegen_,
3352 Location::RegisterLocation(EAX),
3353 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3354}
3355
3356void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3357 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3358 LocationSummary* locations = invoke->GetLocations();
3359 X86Assembler* assembler = GetAssembler();
3360
3361 Register out = locations->Out().AsRegister<Register>();
3362 InvokeRuntimeCallingConvention calling_convention;
3363 if (invoke->InputAt(0)->IsConstant()) {
3364 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3365 if (value >= info.low && value <= info.high) {
3366 // Just embed the j.l.Integer in the code.
3367 ScopedObjectAccess soa(Thread::Current());
3368 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3369 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3370 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3371 __ movl(out, Immediate(address));
3372 } else {
3373 // Allocate and initialize a new j.l.Integer.
3374 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3375 // JIT object table.
3376 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3377 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3378 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3379 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3380 __ movl(Address(out, info.value_offset), Immediate(value));
3381 }
3382 } else {
3383 Register in = locations->InAt(0).AsRegister<Register>();
3384 // Check bounds of our cache.
3385 __ leal(out, Address(in, -info.low));
3386 __ cmpl(out, Immediate(info.high - info.low + 1));
3387 NearLabel allocate, done;
3388 __ j(kAboveEqual, &allocate);
3389 // If the value is within the bounds, load the j.l.Integer directly from the array.
3390 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3391 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3392 __ movl(out, Address(out, TIMES_4, data_offset + address));
3393 __ MaybeUnpoisonHeapReference(out);
3394 __ jmp(&done);
3395 __ Bind(&allocate);
3396 // Otherwise allocate and initialize a new j.l.Integer.
3397 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3398 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3399 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3400 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3401 __ movl(Address(out, info.value_offset), in);
3402 __ Bind(&done);
3403 }
3404}
3405
Aart Bik2f9fcc92016-03-01 15:16:54 -08003406UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
Aart Bik2f9fcc92016-03-01 15:16:54 -08003407UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3408UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3409UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3410UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3411UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
3412UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
Mark Mendell09ed1a32015-03-25 08:30:06 -04003413
Aart Bikff7d89c2016-11-07 08:49:28 -08003414UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3415UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08003416UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3417UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3418UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3419UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
3420UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3421UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08003422
Aart Bik0e54c012016-03-04 12:08:31 -08003423// 1.8.
3424UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3425UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3426UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3427UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3428UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08003429
Aart Bik2f9fcc92016-03-01 15:16:54 -08003430UNREACHABLE_INTRINSICS(X86)
Roland Levillain4d027112015-07-01 15:41:14 +01003431
3432#undef __
3433
Mark Mendell09ed1a32015-03-25 08:30:06 -04003434} // namespace x86
3435} // namespace art