blob: c0c4ff3388e5f5c08638f9d0e03a7ba1739089fc [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080020#include "code_generator_x86_64.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86_64/assembler_x86_64.h"
28#include "utils/x86_64/constants_x86_64.h"
29
30namespace art {
31
32namespace x86_64 {
33
Mark Mendellfb8d2792015-03-31 22:16:59 -040034IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
35 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
36}
37
38
Andreas Gampe71fb52f2014-12-29 17:43:08 -080039X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
40 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
41}
42
Andreas Gampe878d58c2015-01-15 23:24:00 -080043ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080044 return codegen_->GetGraph()->GetArena();
45}
46
47bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
48 Dispatch(invoke);
49 const LocationSummary* res = invoke->GetLocations();
50 return res != nullptr && res->Intrinsified();
51}
52
53#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
54
55// TODO: trg as memory.
56static void MoveFromReturnRegister(Location trg,
57 Primitive::Type type,
58 CodeGeneratorX86_64* codegen) {
59 if (!trg.IsValid()) {
60 DCHECK(type == Primitive::kPrimVoid);
61 return;
62 }
63
64 switch (type) {
65 case Primitive::kPrimBoolean:
66 case Primitive::kPrimByte:
67 case Primitive::kPrimChar:
68 case Primitive::kPrimShort:
69 case Primitive::kPrimInt:
70 case Primitive::kPrimNot: {
71 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
72 if (trg_reg.AsRegister() != RAX) {
73 __ movl(trg_reg, CpuRegister(RAX));
74 }
75 break;
76 }
77 case Primitive::kPrimLong: {
78 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
79 if (trg_reg.AsRegister() != RAX) {
80 __ movq(trg_reg, CpuRegister(RAX));
81 }
82 break;
83 }
84
85 case Primitive::kPrimVoid:
86 LOG(FATAL) << "Unexpected void type for valid location " << trg;
87 UNREACHABLE();
88
89 case Primitive::kPrimDouble: {
90 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
91 if (trg_reg.AsFloatRegister() != XMM0) {
92 __ movsd(trg_reg, XmmRegister(XMM0));
93 }
94 break;
95 }
96 case Primitive::kPrimFloat: {
97 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
98 if (trg_reg.AsFloatRegister() != XMM0) {
99 __ movss(trg_reg, XmmRegister(XMM0));
100 }
101 break;
102 }
103 }
104}
105
106static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
107 if (invoke->InputCount() == 0) {
108 return;
109 }
110
111 LocationSummary* locations = invoke->GetLocations();
112 InvokeDexCallingConventionVisitor calling_convention_visitor;
113
114 // We're moving potentially two or more locations to locations that could overlap, so we need
115 // a parallel move resolver.
116 HParallelMove parallel_move(arena);
117
118 for (size_t i = 0; i < invoke->InputCount(); i++) {
119 HInstruction* input = invoke->InputAt(i);
120 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
121 Location actual_loc = locations->InAt(i);
122
Nicolas Geoffray42d1f5f2015-01-16 09:14:18 +0000123 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800124 }
125
126 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
127}
128
129// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
130// call. This will copy the arguments into the positions for a regular call.
131//
132// Note: The actual parameters are required to be in the locations given by the invoke's location
133// summary. If an intrinsic modifies those locations before a slowpath call, they must be
134// restored!
135class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
136 public:
137 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
138
139 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
140 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
141 __ Bind(GetEntryLabel());
142
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000143 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800144
145 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
146
147 if (invoke_->IsInvokeStaticOrDirect()) {
148 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000149 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800150 } else {
151 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
152 UNREACHABLE();
153 }
154
155 // Copy the result back to the expected output.
156 Location out = invoke_->GetLocations()->Out();
157 if (out.IsValid()) {
158 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
159 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
160 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
161 }
162
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000163 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800164 __ jmp(GetExitLabel());
165 }
166
167 private:
168 // The instruction where this slow path is happening.
169 HInvoke* const invoke_;
170
171 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
172};
173
174#undef __
175#define __ assembler->
176
177static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
178 LocationSummary* locations = new (arena) LocationSummary(invoke,
179 LocationSummary::kNoCall,
180 kIntrinsified);
181 locations->SetInAt(0, Location::RequiresFpuRegister());
182 locations->SetOut(Location::RequiresRegister());
183}
184
185static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
186 LocationSummary* locations = new (arena) LocationSummary(invoke,
187 LocationSummary::kNoCall,
188 kIntrinsified);
189 locations->SetInAt(0, Location::RequiresRegister());
190 locations->SetOut(Location::RequiresFpuRegister());
191}
192
193static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
197}
198
199static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
200 Location input = locations->InAt(0);
201 Location output = locations->Out();
202 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
203}
204
205void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206 CreateFPToIntLocations(arena_, invoke);
207}
208void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209 CreateIntToFPLocations(arena_, invoke);
210}
211
212void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
213 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
214}
215void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
216 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
217}
218
219void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220 CreateFPToIntLocations(arena_, invoke);
221}
222void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223 CreateIntToFPLocations(arena_, invoke);
224}
225
226void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
227 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
228}
229void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
230 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
231}
232
233static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
234 LocationSummary* locations = new (arena) LocationSummary(invoke,
235 LocationSummary::kNoCall,
236 kIntrinsified);
237 locations->SetInAt(0, Location::RequiresRegister());
238 locations->SetOut(Location::SameAsFirstInput());
239}
240
241static void GenReverseBytes(LocationSummary* locations,
242 Primitive::Type size,
243 X86_64Assembler* assembler) {
244 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
245
246 switch (size) {
247 case Primitive::kPrimShort:
248 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
249 __ bswapl(out);
250 __ sarl(out, Immediate(16));
251 break;
252 case Primitive::kPrimInt:
253 __ bswapl(out);
254 break;
255 case Primitive::kPrimLong:
256 __ bswapq(out);
257 break;
258 default:
259 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
260 UNREACHABLE();
261 }
262}
263
264void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
265 CreateIntToIntLocations(arena_, invoke);
266}
267
268void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
270}
271
272void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
273 CreateIntToIntLocations(arena_, invoke);
274}
275
276void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
277 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
278}
279
280void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
281 CreateIntToIntLocations(arena_, invoke);
282}
283
284void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
285 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
286}
287
288
289// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
290// need is 64b.
291
292static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
293 // TODO: Enable memory operations when the assembler supports them.
294 LocationSummary* locations = new (arena) LocationSummary(invoke,
295 LocationSummary::kNoCall,
296 kIntrinsified);
297 locations->SetInAt(0, Location::RequiresFpuRegister());
298 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
299 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
300 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400301 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800302}
303
Mark Mendellf55c3e02015-03-26 21:07:46 -0400304static void MathAbsFP(LocationSummary* locations, bool is64bit,
305 X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800306 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800307
308 if (output.IsFpuRegister()) {
309 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400310 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800311
Mark Mendellf55c3e02015-03-26 21:07:46 -0400312 // TODO: Can mask directly with constant area if we align on 16 bytes.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800313 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400314 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800315 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
316 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400317 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800318 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
319 }
320 } else {
321 // TODO: update when assember support is available.
322 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
323// Once assembler support is available, in-memory operations look like this:
324// if (is64bit) {
325// DCHECK(output.IsDoubleStackSlot());
326// // No 64b and with literal.
327// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
328// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
329// } else {
330// DCHECK(output.IsStackSlot());
331// // Can use and with a literal directly.
332// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
333// }
334 }
335}
336
337void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
338 CreateFloatToFloatPlusTemps(arena_, invoke);
339}
340
341void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400342 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800343}
344
345void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
346 CreateFloatToFloatPlusTemps(arena_, invoke);
347}
348
349void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400350 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800351}
352
353static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
354 LocationSummary* locations = new (arena) LocationSummary(invoke,
355 LocationSummary::kNoCall,
356 kIntrinsified);
357 locations->SetInAt(0, Location::RequiresRegister());
358 locations->SetOut(Location::SameAsFirstInput());
359 locations->AddTemp(Location::RequiresRegister());
360}
361
362static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
363 Location output = locations->Out();
364 CpuRegister out = output.AsRegister<CpuRegister>();
365 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
366
367 if (is64bit) {
368 // Create mask.
369 __ movq(mask, out);
370 __ sarq(mask, Immediate(63));
371 // Add mask.
372 __ addq(out, mask);
373 __ xorq(out, mask);
374 } else {
375 // Create mask.
376 __ movl(mask, out);
377 __ sarl(mask, Immediate(31));
378 // Add mask.
379 __ addl(out, mask);
380 __ xorl(out, mask);
381 }
382}
383
384void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
385 CreateIntToIntPlusTemp(arena_, invoke);
386}
387
388void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
389 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
390}
391
392void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
393 CreateIntToIntPlusTemp(arena_, invoke);
394}
395
396void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
397 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
398}
399
400static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
Mark Mendellf55c3e02015-03-26 21:07:46 -0400401 X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800402 Location op1_loc = locations->InAt(0);
403 Location op2_loc = locations->InAt(1);
404 Location out_loc = locations->Out();
405 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
406
407 // Shortcut for same input locations.
408 if (op1_loc.Equals(op2_loc)) {
409 DCHECK(out_loc.Equals(op1_loc));
410 return;
411 }
412
413 // (out := op1)
414 // out <=? op2
415 // if Nan jmp Nan_label
416 // if out is min jmp done
417 // if op2 is min jmp op2_label
418 // handle -0/+0
419 // jmp done
420 // Nan_label:
421 // out := NaN
422 // op2_label:
423 // out := op2
424 // done:
425 //
426 // This removes one jmp, but needs to copy one input (op1) to out.
427 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400428 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800429
430 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
431
432 Label nan, done, op2_label;
433 if (is_double) {
434 __ ucomisd(out, op2);
435 } else {
436 __ ucomiss(out, op2);
437 }
438
439 __ j(Condition::kParityEven, &nan);
440
441 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
442 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
443
444 // Handle 0.0/-0.0.
445 if (is_min) {
446 if (is_double) {
447 __ orpd(out, op2);
448 } else {
449 __ orps(out, op2);
450 }
451 } else {
452 if (is_double) {
453 __ andpd(out, op2);
454 } else {
455 __ andps(out, op2);
456 }
457 }
458 __ jmp(&done);
459
460 // NaN handling.
461 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800462 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400463 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800464 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400465 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800466 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800467 __ jmp(&done);
468
469 // out := op2;
470 __ Bind(&op2_label);
471 if (is_double) {
472 __ movsd(out, op2);
473 } else {
474 __ movss(out, op2);
475 }
476
477 // Done.
478 __ Bind(&done);
479}
480
Mark Mendellf55c3e02015-03-26 21:07:46 -0400481static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800482 LocationSummary* locations = new (arena) LocationSummary(invoke,
483 LocationSummary::kNoCall,
484 kIntrinsified);
485 locations->SetInAt(0, Location::RequiresFpuRegister());
486 locations->SetInAt(1, Location::RequiresFpuRegister());
487 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
488 // the second input to be the output (we can simply swap inputs).
489 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800490}
491
492void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400493 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800494}
495
496void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400497 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800498}
499
500void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400501 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800502}
503
504void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400505 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800506}
507
508void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400509 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800510}
511
512void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400513 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800514}
515
516void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400517 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800518}
519
520void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400521 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800522}
523
524static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
525 X86_64Assembler* assembler) {
526 Location op1_loc = locations->InAt(0);
527 Location op2_loc = locations->InAt(1);
528
529 // Shortcut for same input locations.
530 if (op1_loc.Equals(op2_loc)) {
531 // Can return immediately, as op1_loc == out_loc.
532 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
533 // a copy here.
534 DCHECK(locations->Out().Equals(op1_loc));
535 return;
536 }
537
538 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
539 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
540
541 // (out := op1)
542 // out <=? op2
543 // if out is min jmp done
544 // out := op2
545 // done:
546
547 if (is_long) {
548 __ cmpq(out, op2);
549 } else {
550 __ cmpl(out, op2);
551 }
552
553 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
554}
555
556static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
557 LocationSummary* locations = new (arena) LocationSummary(invoke,
558 LocationSummary::kNoCall,
559 kIntrinsified);
560 locations->SetInAt(0, Location::RequiresRegister());
561 locations->SetInAt(1, Location::RequiresRegister());
562 locations->SetOut(Location::SameAsFirstInput());
563}
564
565void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
566 CreateIntIntToIntLocations(arena_, invoke);
567}
568
569void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
570 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
571}
572
573void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
574 CreateIntIntToIntLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
578 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
582 CreateIntIntToIntLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
586 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
587}
588
589void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
590 CreateIntIntToIntLocations(arena_, invoke);
591}
592
593void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
594 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
595}
596
597static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
598 LocationSummary* locations = new (arena) LocationSummary(invoke,
599 LocationSummary::kNoCall,
600 kIntrinsified);
601 locations->SetInAt(0, Location::RequiresFpuRegister());
602 locations->SetOut(Location::RequiresFpuRegister());
603}
604
605void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
606 CreateFPToFPLocations(arena_, invoke);
607}
608
609void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
610 LocationSummary* locations = invoke->GetLocations();
611 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
612 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
613
614 GetAssembler()->sqrtsd(out, in);
615}
616
Mark Mendellfb8d2792015-03-31 22:16:59 -0400617static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
618 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
619
620 DCHECK(invoke->IsInvokeStaticOrDirect());
621 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
622 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
623
624 // Copy the result back to the expected output.
625 Location out = invoke->GetLocations()->Out();
626 if (out.IsValid()) {
627 DCHECK(out.IsRegister());
628 MoveFromReturnRegister(out, invoke->GetType(), codegen);
629 }
630}
631
632static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
633 HInvoke* invoke,
634 CodeGeneratorX86_64* codegen) {
635 // Do we have instruction support?
636 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
637 CreateFPToFPLocations(arena, invoke);
638 return;
639 }
640
641 // We have to fall back to a call to the intrinsic.
642 LocationSummary* locations = new (arena) LocationSummary(invoke,
643 LocationSummary::kCall);
644 InvokeRuntimeCallingConvention calling_convention;
645 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
646 locations->SetOut(Location::FpuRegisterLocation(XMM0));
647 // Needs to be RDI for the invoke.
648 locations->AddTemp(Location::RegisterLocation(RDI));
649}
650
651static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
652 HInvoke* invoke,
653 X86_64Assembler* assembler,
654 int round_mode) {
655 LocationSummary* locations = invoke->GetLocations();
656 if (locations->WillCall()) {
657 InvokeOutOfLineIntrinsic(codegen, invoke);
658 } else {
659 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
660 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
661 __ roundsd(out, in, Immediate(round_mode));
662 }
663}
664
665void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
666 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
667}
668
669void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
670 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
671}
672
673void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
674 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
675}
676
677void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
678 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
679}
680
681void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
682 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
683}
684
685void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
686 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
687}
688
689static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
690 HInvoke* invoke,
691 CodeGeneratorX86_64* codegen) {
692 // Do we have instruction support?
693 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
694 LocationSummary* locations = new (arena) LocationSummary(invoke,
695 LocationSummary::kNoCall,
696 kIntrinsified);
697 locations->SetInAt(0, Location::RequiresFpuRegister());
698 locations->SetOut(Location::RequiresFpuRegister());
699 locations->AddTemp(Location::RequiresFpuRegister());
700 locations->AddTemp(Location::RequiresFpuRegister());
701 return;
702 }
703
704 // We have to fall back to a call to the intrinsic.
705 LocationSummary* locations = new (arena) LocationSummary(invoke,
706 LocationSummary::kCall);
707 InvokeRuntimeCallingConvention calling_convention;
708 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
709 locations->SetOut(Location::RegisterLocation(RAX));
710 // Needs to be RDI for the invoke.
711 locations->AddTemp(Location::RegisterLocation(RDI));
712}
713
714void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
715 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
716}
717
718void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
719 LocationSummary* locations = invoke->GetLocations();
720 if (locations->WillCall()) {
721 InvokeOutOfLineIntrinsic(codegen_, invoke);
722 return;
723 }
724
725 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
726 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
727 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
728 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
729 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
730 Label done, nan;
731 X86_64Assembler* assembler = GetAssembler();
732
733 // Generate 0.5 into inPlusPointFive.
734 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
735 __ movd(inPlusPointFive, out, false);
736
737 // Add in the input.
738 __ addss(inPlusPointFive, in);
739
740 // And truncate to an integer.
741 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
742
743 __ movl(out, Immediate(kPrimIntMax));
744 // maxInt = int-to-float(out)
745 __ cvtsi2ss(maxInt, out);
746
747 // if inPlusPointFive >= maxInt goto done
748 __ comiss(inPlusPointFive, maxInt);
749 __ j(kAboveEqual, &done);
750
751 // if input == NaN goto nan
752 __ j(kUnordered, &nan);
753
754 // output = float-to-int-truncate(input)
755 __ cvttss2si(out, inPlusPointFive);
756 __ jmp(&done);
757 __ Bind(&nan);
758
759 // output = 0
760 __ xorl(out, out);
761 __ Bind(&done);
762}
763
764void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
765 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
766}
767
768void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
769 LocationSummary* locations = invoke->GetLocations();
770 if (locations->WillCall()) {
771 InvokeOutOfLineIntrinsic(codegen_, invoke);
772 return;
773 }
774
775 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
776 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
777 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
778 XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
779 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
780 Label done, nan;
781 X86_64Assembler* assembler = GetAssembler();
782
783 // Generate 0.5 into inPlusPointFive.
784 __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
785 __ movd(inPlusPointFive, out, true);
786
787 // Add in the input.
788 __ addsd(inPlusPointFive, in);
789
790 // And truncate to an integer.
791 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
792
793 __ movq(out, Immediate(kPrimLongMax));
794 // maxLong = long-to-double(out)
795 __ cvtsi2sd(maxLong, out, true);
796
797 // if inPlusPointFive >= maxLong goto done
798 __ comisd(inPlusPointFive, maxLong);
799 __ j(kAboveEqual, &done);
800
801 // if input == NaN goto nan
802 __ j(kUnordered, &nan);
803
804 // output = double-to-long-truncate(input)
805 __ cvttsd2si(out, inPlusPointFive, true);
806 __ jmp(&done);
807 __ Bind(&nan);
808
809 // output = 0
810 __ xorq(out, out);
811 __ Bind(&done);
812}
813
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800814void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
815 // The inputs plus one temp.
816 LocationSummary* locations = new (arena_) LocationSummary(invoke,
817 LocationSummary::kCallOnSlowPath,
818 kIntrinsified);
819 locations->SetInAt(0, Location::RequiresRegister());
820 locations->SetInAt(1, Location::RequiresRegister());
821 locations->SetOut(Location::SameAsFirstInput());
822 locations->AddTemp(Location::RequiresRegister());
823}
824
825void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
826 LocationSummary* locations = invoke->GetLocations();
827
828 // Location of reference to data array
829 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
830 // Location of count
831 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
832 // Starting offset within data array
833 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
834 // Start of char data with array_
835 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
836
837 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
838 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
839 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
840 Location temp_loc = locations->GetTemp(0);
841 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
842
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800843 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
844 // the cost.
845 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
846 // we will not optimize the code for constants (which would save a register).
847
Andreas Gampe878d58c2015-01-15 23:24:00 -0800848 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800849 codegen_->AddSlowPath(slow_path);
850
851 X86_64Assembler* assembler = GetAssembler();
852
853 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800854 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800855 __ j(kAboveEqual, slow_path->GetEntryLabel());
856
857 // Get the actual element.
858 __ movl(temp, idx); // temp := idx.
859 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
860 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
861 // out = out[2*temp].
862 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
863
864 __ Bind(slow_path->GetExitLabel());
865}
866
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000867void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
868 LocationSummary* locations = new (arena_) LocationSummary(invoke,
869 LocationSummary::kCall,
870 kIntrinsified);
871 InvokeRuntimeCallingConvention calling_convention;
872 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
873 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
874 locations->SetOut(Location::RegisterLocation(RAX));
875}
876
877void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
878 X86_64Assembler* assembler = GetAssembler();
879 LocationSummary* locations = invoke->GetLocations();
880
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000881 // Note that the null check must have been done earlier.
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000882 DCHECK(!invoke->CanDoImplicitNullCheck());
883
884 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
885 __ testl(argument, argument);
886 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
887 codegen_->AddSlowPath(slow_path);
888 __ j(kEqual, slow_path->GetEntryLabel());
889
890 __ gs()->call(Address::Absolute(
891 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
892 __ Bind(slow_path->GetExitLabel());
893}
894
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800895static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
896 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
897 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
898 // x86 allows unaligned access. We do not have to check the input or use specific instructions
899 // to avoid a SIGBUS.
900 switch (size) {
901 case Primitive::kPrimByte:
902 __ movsxb(out, Address(address, 0));
903 break;
904 case Primitive::kPrimShort:
905 __ movsxw(out, Address(address, 0));
906 break;
907 case Primitive::kPrimInt:
908 __ movl(out, Address(address, 0));
909 break;
910 case Primitive::kPrimLong:
911 __ movq(out, Address(address, 0));
912 break;
913 default:
914 LOG(FATAL) << "Type not recognized for peek: " << size;
915 UNREACHABLE();
916 }
917}
918
919void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
920 CreateIntToIntLocations(arena_, invoke);
921}
922
923void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
924 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
925}
926
927void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
928 CreateIntToIntLocations(arena_, invoke);
929}
930
931void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
932 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
933}
934
935void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
936 CreateIntToIntLocations(arena_, invoke);
937}
938
939void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
940 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
941}
942
943void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
944 CreateIntToIntLocations(arena_, invoke);
945}
946
947void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
948 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
949}
950
951static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
952 LocationSummary* locations = new (arena) LocationSummary(invoke,
953 LocationSummary::kNoCall,
954 kIntrinsified);
955 locations->SetInAt(0, Location::RequiresRegister());
956 locations->SetInAt(1, Location::RequiresRegister());
957}
958
959static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
960 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
961 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
962 // x86 allows unaligned access. We do not have to check the input or use specific instructions
963 // to avoid a SIGBUS.
964 switch (size) {
965 case Primitive::kPrimByte:
966 __ movb(Address(address, 0), value);
967 break;
968 case Primitive::kPrimShort:
969 __ movw(Address(address, 0), value);
970 break;
971 case Primitive::kPrimInt:
972 __ movl(Address(address, 0), value);
973 break;
974 case Primitive::kPrimLong:
975 __ movq(Address(address, 0), value);
976 break;
977 default:
978 LOG(FATAL) << "Type not recognized for poke: " << size;
979 UNREACHABLE();
980 }
981}
982
983void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
984 CreateIntIntToVoidLocations(arena_, invoke);
985}
986
987void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
988 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
989}
990
991void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
992 CreateIntIntToVoidLocations(arena_, invoke);
993}
994
995void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
996 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
997}
998
999void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1000 CreateIntIntToVoidLocations(arena_, invoke);
1001}
1002
1003void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1004 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1005}
1006
1007void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1008 CreateIntIntToVoidLocations(arena_, invoke);
1009}
1010
1011void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1012 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1013}
1014
1015void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1016 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1017 LocationSummary::kNoCall,
1018 kIntrinsified);
1019 locations->SetOut(Location::RequiresRegister());
1020}
1021
1022void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1023 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1024 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1025}
1026
Andreas Gampe878d58c2015-01-15 23:24:00 -08001027static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001028 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1029 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1030 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1031 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1032
Andreas Gampe878d58c2015-01-15 23:24:00 -08001033 switch (type) {
1034 case Primitive::kPrimInt:
1035 case Primitive::kPrimNot:
1036 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1037 break;
1038
1039 case Primitive::kPrimLong:
1040 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1041 break;
1042
1043 default:
1044 LOG(FATAL) << "Unsupported op size " << type;
1045 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001046 }
1047}
1048
1049static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1050 LocationSummary* locations = new (arena) LocationSummary(invoke,
1051 LocationSummary::kNoCall,
1052 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001053 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001054 locations->SetInAt(1, Location::RequiresRegister());
1055 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001056 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001057}
1058
1059void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1060 CreateIntIntIntToIntLocations(arena_, invoke);
1061}
1062void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1063 CreateIntIntIntToIntLocations(arena_, invoke);
1064}
1065void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1066 CreateIntIntIntToIntLocations(arena_, invoke);
1067}
1068void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1069 CreateIntIntIntToIntLocations(arena_, invoke);
1070}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001071void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1072 CreateIntIntIntToIntLocations(arena_, invoke);
1073}
1074void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1075 CreateIntIntIntToIntLocations(arena_, invoke);
1076}
1077
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001078
1079void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001080 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001081}
1082void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001083 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001084}
1085void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001086 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001087}
1088void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001089 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001090}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001091void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1092 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1093}
1094void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1095 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1096}
1097
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001098
1099static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1100 Primitive::Type type,
1101 HInvoke* invoke) {
1102 LocationSummary* locations = new (arena) LocationSummary(invoke,
1103 LocationSummary::kNoCall,
1104 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001105 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001106 locations->SetInAt(1, Location::RequiresRegister());
1107 locations->SetInAt(2, Location::RequiresRegister());
1108 locations->SetInAt(3, Location::RequiresRegister());
1109 if (type == Primitive::kPrimNot) {
1110 // Need temp registers for card-marking.
1111 locations->AddTemp(Location::RequiresRegister());
1112 locations->AddTemp(Location::RequiresRegister());
1113 }
1114}
1115
1116void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1117 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1118}
1119void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1120 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1121}
1122void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1123 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1124}
1125void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1126 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1127}
1128void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1129 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1130}
1131void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1132 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1133}
1134void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1135 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1136}
1137void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1138 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1139}
1140void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1141 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1142}
1143
1144// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1145// memory model.
1146static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1147 CodeGeneratorX86_64* codegen) {
1148 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1149 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1150 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1151 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1152
1153 if (type == Primitive::kPrimLong) {
1154 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1155 } else {
1156 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1157 }
1158
1159 if (is_volatile) {
1160 __ mfence();
1161 }
1162
1163 if (type == Primitive::kPrimNot) {
1164 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1165 locations->GetTemp(1).AsRegister<CpuRegister>(),
1166 base,
1167 value);
1168 }
1169}
1170
1171void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1172 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1173}
1174void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1175 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1176}
1177void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1178 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1179}
1180void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1181 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1182}
1183void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1184 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1185}
1186void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1187 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1188}
1189void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1190 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1191}
1192void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1193 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1194}
1195void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1196 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1197}
1198
Mark Mendell58d25fd2015-04-03 14:52:31 -04001199static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1200 HInvoke* invoke) {
1201 LocationSummary* locations = new (arena) LocationSummary(invoke,
1202 LocationSummary::kNoCall,
1203 kIntrinsified);
1204 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1205 locations->SetInAt(1, Location::RequiresRegister());
1206 locations->SetInAt(2, Location::RequiresRegister());
1207 // expected value must be in EAX/RAX.
1208 locations->SetInAt(3, Location::RegisterLocation(RAX));
1209 locations->SetInAt(4, Location::RequiresRegister());
1210
1211 locations->SetOut(Location::RequiresRegister());
1212 if (type == Primitive::kPrimNot) {
1213 // Need temp registers for card-marking.
1214 locations->AddTemp(Location::RequiresRegister());
1215 locations->AddTemp(Location::RequiresRegister());
1216 }
1217}
1218
1219void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1220 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1221}
1222
1223void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1224 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1225}
1226
1227void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1228 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1229}
1230
1231static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1232 X86_64Assembler* assembler =
1233 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1234 LocationSummary* locations = invoke->GetLocations();
1235
1236 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1237 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1238 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1239 DCHECK_EQ(expected.AsRegister(), RAX);
1240 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1241 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1242
1243 if (type == Primitive::kPrimLong) {
1244 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1245 } else {
1246 // Integer or object.
1247 if (type == Primitive::kPrimNot) {
1248 // Mark card for object assuming new value is stored.
1249 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1250 locations->GetTemp(1).AsRegister<CpuRegister>(),
1251 base,
1252 value);
1253 }
1254
1255 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1256 }
1257
1258 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1259 // barriers at this time.
1260
1261 // Convert ZF into the boolean result.
1262 __ setcc(kZero, out);
1263 __ movzxb(out, out);
1264}
1265
1266void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1267 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1268}
1269
1270void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1271 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1272}
1273
1274void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1275 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1276}
1277
1278void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1279 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1280 LocationSummary::kNoCall,
1281 kIntrinsified);
1282 locations->SetInAt(0, Location::RequiresRegister());
1283 locations->SetOut(Location::SameAsFirstInput());
1284 locations->AddTemp(Location::RequiresRegister());
1285}
1286
1287static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1288 X86_64Assembler* assembler) {
1289 Immediate imm_shift(shift);
1290 Immediate imm_mask(mask);
1291 __ movl(temp, reg);
1292 __ shrl(reg, imm_shift);
1293 __ andl(temp, imm_mask);
1294 __ andl(reg, imm_mask);
1295 __ shll(temp, imm_shift);
1296 __ orl(reg, temp);
1297}
1298
1299void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1300 X86_64Assembler* assembler =
1301 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1302 LocationSummary* locations = invoke->GetLocations();
1303
1304 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1305 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1306
1307 /*
1308 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1309 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1310 * compared to generic luni implementation which has 5 rounds of swapping bits.
1311 * x = bswap x
1312 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1313 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1314 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1315 */
1316 __ bswapl(reg);
1317 SwapBits(reg, temp, 1, 0x55555555, assembler);
1318 SwapBits(reg, temp, 2, 0x33333333, assembler);
1319 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1320}
1321
1322void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1323 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1324 LocationSummary::kNoCall,
1325 kIntrinsified);
1326 locations->SetInAt(0, Location::RequiresRegister());
1327 locations->SetOut(Location::SameAsFirstInput());
1328 locations->AddTemp(Location::RequiresRegister());
1329 locations->AddTemp(Location::RequiresRegister());
1330}
1331
1332static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1333 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1334 Immediate imm_shift(shift);
1335 __ movq(temp_mask, Immediate(mask));
1336 __ movq(temp, reg);
1337 __ shrq(reg, imm_shift);
1338 __ andq(temp, temp_mask);
1339 __ andq(reg, temp_mask);
1340 __ shlq(temp, imm_shift);
1341 __ orq(reg, temp);
1342}
1343
1344void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1345 X86_64Assembler* assembler =
1346 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1347 LocationSummary* locations = invoke->GetLocations();
1348
1349 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1350 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1351 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1352
1353 /*
1354 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1355 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1356 * compared to generic luni implementation which has 5 rounds of swapping bits.
1357 * x = bswap x
1358 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1359 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1360 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1361 */
1362 __ bswapq(reg);
1363 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1364 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1365 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1366}
1367
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001368// Unimplemented intrinsics.
1369
1370#define UNIMPLEMENTED_INTRINSIC(Name) \
1371void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1372} \
1373void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1374}
1375
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001376UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1377UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1378UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001379UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1380
1381} // namespace x86_64
1382} // namespace art