blob: 2064b181386d54ecad96a01c26e09f4c5ff2339e [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
19#include "code_generator_x86_64.h"
20#include "entrypoints/quick/quick_entrypoints.h"
21#include "intrinsics.h"
22#include "mirror/array-inl.h"
23#include "mirror/art_method.h"
24#include "mirror/string.h"
25#include "thread.h"
26#include "utils/x86_64/assembler_x86_64.h"
27#include "utils/x86_64/constants_x86_64.h"
28
29namespace art {
30
31namespace x86_64 {
32
Andreas Gampe71fb52f2014-12-29 17:43:08 -080033X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
34 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
35}
36
Andreas Gampe878d58c2015-01-15 23:24:00 -080037ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080038 return codegen_->GetGraph()->GetArena();
39}
40
41bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
42 Dispatch(invoke);
43 const LocationSummary* res = invoke->GetLocations();
44 return res != nullptr && res->Intrinsified();
45}
46
47#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
48
49// TODO: trg as memory.
50static void MoveFromReturnRegister(Location trg,
51 Primitive::Type type,
52 CodeGeneratorX86_64* codegen) {
53 if (!trg.IsValid()) {
54 DCHECK(type == Primitive::kPrimVoid);
55 return;
56 }
57
58 switch (type) {
59 case Primitive::kPrimBoolean:
60 case Primitive::kPrimByte:
61 case Primitive::kPrimChar:
62 case Primitive::kPrimShort:
63 case Primitive::kPrimInt:
64 case Primitive::kPrimNot: {
65 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
66 if (trg_reg.AsRegister() != RAX) {
67 __ movl(trg_reg, CpuRegister(RAX));
68 }
69 break;
70 }
71 case Primitive::kPrimLong: {
72 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
73 if (trg_reg.AsRegister() != RAX) {
74 __ movq(trg_reg, CpuRegister(RAX));
75 }
76 break;
77 }
78
79 case Primitive::kPrimVoid:
80 LOG(FATAL) << "Unexpected void type for valid location " << trg;
81 UNREACHABLE();
82
83 case Primitive::kPrimDouble: {
84 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
85 if (trg_reg.AsFloatRegister() != XMM0) {
86 __ movsd(trg_reg, XmmRegister(XMM0));
87 }
88 break;
89 }
90 case Primitive::kPrimFloat: {
91 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
92 if (trg_reg.AsFloatRegister() != XMM0) {
93 __ movss(trg_reg, XmmRegister(XMM0));
94 }
95 break;
96 }
97 }
98}
99
100static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
101 if (invoke->InputCount() == 0) {
102 return;
103 }
104
105 LocationSummary* locations = invoke->GetLocations();
106 InvokeDexCallingConventionVisitor calling_convention_visitor;
107
108 // We're moving potentially two or more locations to locations that could overlap, so we need
109 // a parallel move resolver.
110 HParallelMove parallel_move(arena);
111
112 for (size_t i = 0; i < invoke->InputCount(); i++) {
113 HInstruction* input = invoke->InputAt(i);
114 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
115 Location actual_loc = locations->InAt(i);
116
Nicolas Geoffray42d1f5f2015-01-16 09:14:18 +0000117 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800118 }
119
120 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
121}
122
123// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
124// call. This will copy the arguments into the positions for a regular call.
125//
126// Note: The actual parameters are required to be in the locations given by the invoke's location
127// summary. If an intrinsic modifies those locations before a slowpath call, they must be
128// restored!
129class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
130 public:
131 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
132
133 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
134 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
135 __ Bind(GetEntryLabel());
136
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000137 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800138
139 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
140
141 if (invoke_->IsInvokeStaticOrDirect()) {
142 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000143 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800144 } else {
145 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
146 UNREACHABLE();
147 }
148
149 // Copy the result back to the expected output.
150 Location out = invoke_->GetLocations()->Out();
151 if (out.IsValid()) {
152 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
153 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
154 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
155 }
156
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000157 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800158 __ jmp(GetExitLabel());
159 }
160
161 private:
162 // The instruction where this slow path is happening.
163 HInvoke* const invoke_;
164
165 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
166};
167
168#undef __
169#define __ assembler->
170
171static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
172 LocationSummary* locations = new (arena) LocationSummary(invoke,
173 LocationSummary::kNoCall,
174 kIntrinsified);
175 locations->SetInAt(0, Location::RequiresFpuRegister());
176 locations->SetOut(Location::RequiresRegister());
177}
178
179static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
180 LocationSummary* locations = new (arena) LocationSummary(invoke,
181 LocationSummary::kNoCall,
182 kIntrinsified);
183 locations->SetInAt(0, Location::RequiresRegister());
184 locations->SetOut(Location::RequiresFpuRegister());
185}
186
187static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
188 Location input = locations->InAt(0);
189 Location output = locations->Out();
190 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
191}
192
193static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
197}
198
199void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 CreateFPToIntLocations(arena_, invoke);
201}
202void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 CreateIntToFPLocations(arena_, invoke);
204}
205
206void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
207 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
208}
209void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
210 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
211}
212
213void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 CreateFPToIntLocations(arena_, invoke);
215}
216void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 CreateIntToFPLocations(arena_, invoke);
218}
219
220void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
221 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
222}
223void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
224 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
225}
226
227static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
228 LocationSummary* locations = new (arena) LocationSummary(invoke,
229 LocationSummary::kNoCall,
230 kIntrinsified);
231 locations->SetInAt(0, Location::RequiresRegister());
232 locations->SetOut(Location::SameAsFirstInput());
233}
234
235static void GenReverseBytes(LocationSummary* locations,
236 Primitive::Type size,
237 X86_64Assembler* assembler) {
238 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
239
240 switch (size) {
241 case Primitive::kPrimShort:
242 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
243 __ bswapl(out);
244 __ sarl(out, Immediate(16));
245 break;
246 case Primitive::kPrimInt:
247 __ bswapl(out);
248 break;
249 case Primitive::kPrimLong:
250 __ bswapq(out);
251 break;
252 default:
253 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
254 UNREACHABLE();
255 }
256}
257
258void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
259 CreateIntToIntLocations(arena_, invoke);
260}
261
262void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
263 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
264}
265
266void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
267 CreateIntToIntLocations(arena_, invoke);
268}
269
270void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
271 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
272}
273
274void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
275 CreateIntToIntLocations(arena_, invoke);
276}
277
278void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
279 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
280}
281
282
283// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
284// need is 64b.
285
286static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
287 // TODO: Enable memory operations when the assembler supports them.
288 LocationSummary* locations = new (arena) LocationSummary(invoke,
289 LocationSummary::kNoCall,
290 kIntrinsified);
291 locations->SetInAt(0, Location::RequiresFpuRegister());
292 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
293 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
294 locations->SetOut(Location::SameAsFirstInput());
295 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
296 locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
297}
298
299static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
300 Location output = locations->Out();
301 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
302
303 if (output.IsFpuRegister()) {
304 // In-register
305 XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
306
307 if (is64bit) {
308 __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
309 __ movd(xmm_temp, cpu_temp);
310 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
311 } else {
312 __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
313 __ movd(xmm_temp, cpu_temp);
314 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
315 }
316 } else {
317 // TODO: update when assember support is available.
318 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
319// Once assembler support is available, in-memory operations look like this:
320// if (is64bit) {
321// DCHECK(output.IsDoubleStackSlot());
322// // No 64b and with literal.
323// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
324// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
325// } else {
326// DCHECK(output.IsStackSlot());
327// // Can use and with a literal directly.
328// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
329// }
330 }
331}
332
333void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
334 CreateFloatToFloatPlusTemps(arena_, invoke);
335}
336
337void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
338 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
339}
340
341void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
342 CreateFloatToFloatPlusTemps(arena_, invoke);
343}
344
345void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
346 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
347}
348
349static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
350 LocationSummary* locations = new (arena) LocationSummary(invoke,
351 LocationSummary::kNoCall,
352 kIntrinsified);
353 locations->SetInAt(0, Location::RequiresRegister());
354 locations->SetOut(Location::SameAsFirstInput());
355 locations->AddTemp(Location::RequiresRegister());
356}
357
358static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
359 Location output = locations->Out();
360 CpuRegister out = output.AsRegister<CpuRegister>();
361 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
362
363 if (is64bit) {
364 // Create mask.
365 __ movq(mask, out);
366 __ sarq(mask, Immediate(63));
367 // Add mask.
368 __ addq(out, mask);
369 __ xorq(out, mask);
370 } else {
371 // Create mask.
372 __ movl(mask, out);
373 __ sarl(mask, Immediate(31));
374 // Add mask.
375 __ addl(out, mask);
376 __ xorl(out, mask);
377 }
378}
379
380void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
381 CreateIntToIntPlusTemp(arena_, invoke);
382}
383
384void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
385 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
386}
387
388void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
389 CreateIntToIntPlusTemp(arena_, invoke);
390}
391
392void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
393 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
394}
395
396static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
397 X86_64Assembler* assembler) {
398 Location op1_loc = locations->InAt(0);
399 Location op2_loc = locations->InAt(1);
400 Location out_loc = locations->Out();
401 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
402
403 // Shortcut for same input locations.
404 if (op1_loc.Equals(op2_loc)) {
405 DCHECK(out_loc.Equals(op1_loc));
406 return;
407 }
408
409 // (out := op1)
410 // out <=? op2
411 // if Nan jmp Nan_label
412 // if out is min jmp done
413 // if op2 is min jmp op2_label
414 // handle -0/+0
415 // jmp done
416 // Nan_label:
417 // out := NaN
418 // op2_label:
419 // out := op2
420 // done:
421 //
422 // This removes one jmp, but needs to copy one input (op1) to out.
423 //
424 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
425
426 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
427
428 Label nan, done, op2_label;
429 if (is_double) {
430 __ ucomisd(out, op2);
431 } else {
432 __ ucomiss(out, op2);
433 }
434
435 __ j(Condition::kParityEven, &nan);
436
437 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
438 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
439
440 // Handle 0.0/-0.0.
441 if (is_min) {
442 if (is_double) {
443 __ orpd(out, op2);
444 } else {
445 __ orps(out, op2);
446 }
447 } else {
448 if (is_double) {
449 __ andpd(out, op2);
450 } else {
451 __ andps(out, op2);
452 }
453 }
454 __ jmp(&done);
455
456 // NaN handling.
457 __ Bind(&nan);
458 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
459 // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
460 if (is_double) {
461 __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
462 } else {
463 __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
464 }
465 __ movd(out, cpu_temp, is_double);
466 __ jmp(&done);
467
468 // out := op2;
469 __ Bind(&op2_label);
470 if (is_double) {
471 __ movsd(out, op2);
472 } else {
473 __ movss(out, op2);
474 }
475
476 // Done.
477 __ Bind(&done);
478}
479
480static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
481 LocationSummary* locations = new (arena) LocationSummary(invoke,
482 LocationSummary::kNoCall,
483 kIntrinsified);
484 locations->SetInAt(0, Location::RequiresFpuRegister());
485 locations->SetInAt(1, Location::RequiresFpuRegister());
486 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
487 // the second input to be the output (we can simply swap inputs).
488 locations->SetOut(Location::SameAsFirstInput());
489 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
490}
491
492void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
493 CreateFPFPToFPPlusTempLocations(arena_, invoke);
494}
495
496void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
497 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
498}
499
500void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
501 CreateFPFPToFPPlusTempLocations(arena_, invoke);
502}
503
504void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
505 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
506}
507
508void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
509 CreateFPFPToFPPlusTempLocations(arena_, invoke);
510}
511
512void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
513 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
514}
515
516void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
517 CreateFPFPToFPPlusTempLocations(arena_, invoke);
518}
519
520void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
521 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
522}
523
524static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
525 X86_64Assembler* assembler) {
526 Location op1_loc = locations->InAt(0);
527 Location op2_loc = locations->InAt(1);
528
529 // Shortcut for same input locations.
530 if (op1_loc.Equals(op2_loc)) {
531 // Can return immediately, as op1_loc == out_loc.
532 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
533 // a copy here.
534 DCHECK(locations->Out().Equals(op1_loc));
535 return;
536 }
537
538 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
539 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
540
541 // (out := op1)
542 // out <=? op2
543 // if out is min jmp done
544 // out := op2
545 // done:
546
547 if (is_long) {
548 __ cmpq(out, op2);
549 } else {
550 __ cmpl(out, op2);
551 }
552
553 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
554}
555
556static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
557 LocationSummary* locations = new (arena) LocationSummary(invoke,
558 LocationSummary::kNoCall,
559 kIntrinsified);
560 locations->SetInAt(0, Location::RequiresRegister());
561 locations->SetInAt(1, Location::RequiresRegister());
562 locations->SetOut(Location::SameAsFirstInput());
563}
564
565void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
566 CreateIntIntToIntLocations(arena_, invoke);
567}
568
569void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
570 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
571}
572
573void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
574 CreateIntIntToIntLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
578 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
582 CreateIntIntToIntLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
586 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
587}
588
589void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
590 CreateIntIntToIntLocations(arena_, invoke);
591}
592
593void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
594 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
595}
596
597static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
598 LocationSummary* locations = new (arena) LocationSummary(invoke,
599 LocationSummary::kNoCall,
600 kIntrinsified);
601 locations->SetInAt(0, Location::RequiresFpuRegister());
602 locations->SetOut(Location::RequiresFpuRegister());
603}
604
605void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
606 CreateFPToFPLocations(arena_, invoke);
607}
608
609void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
610 LocationSummary* locations = invoke->GetLocations();
611 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
612 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
613
614 GetAssembler()->sqrtsd(out, in);
615}
616
617void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
618 // The inputs plus one temp.
619 LocationSummary* locations = new (arena_) LocationSummary(invoke,
620 LocationSummary::kCallOnSlowPath,
621 kIntrinsified);
622 locations->SetInAt(0, Location::RequiresRegister());
623 locations->SetInAt(1, Location::RequiresRegister());
624 locations->SetOut(Location::SameAsFirstInput());
625 locations->AddTemp(Location::RequiresRegister());
626}
627
628void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
629 LocationSummary* locations = invoke->GetLocations();
630
631 // Location of reference to data array
632 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
633 // Location of count
634 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
635 // Starting offset within data array
636 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
637 // Start of char data with array_
638 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
639
640 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
641 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
642 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
643 Location temp_loc = locations->GetTemp(0);
644 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
645
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800646 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
647 // the cost.
648 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
649 // we will not optimize the code for constants (which would save a register).
650
Andreas Gampe878d58c2015-01-15 23:24:00 -0800651 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800652 codegen_->AddSlowPath(slow_path);
653
654 X86_64Assembler* assembler = GetAssembler();
655
656 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800657 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800658 __ j(kAboveEqual, slow_path->GetEntryLabel());
659
660 // Get the actual element.
661 __ movl(temp, idx); // temp := idx.
662 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
663 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
664 // out = out[2*temp].
665 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
666
667 __ Bind(slow_path->GetExitLabel());
668}
669
670static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
671 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
672 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
673 // x86 allows unaligned access. We do not have to check the input or use specific instructions
674 // to avoid a SIGBUS.
675 switch (size) {
676 case Primitive::kPrimByte:
677 __ movsxb(out, Address(address, 0));
678 break;
679 case Primitive::kPrimShort:
680 __ movsxw(out, Address(address, 0));
681 break;
682 case Primitive::kPrimInt:
683 __ movl(out, Address(address, 0));
684 break;
685 case Primitive::kPrimLong:
686 __ movq(out, Address(address, 0));
687 break;
688 default:
689 LOG(FATAL) << "Type not recognized for peek: " << size;
690 UNREACHABLE();
691 }
692}
693
694void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
695 CreateIntToIntLocations(arena_, invoke);
696}
697
698void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
699 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
700}
701
702void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
703 CreateIntToIntLocations(arena_, invoke);
704}
705
706void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
707 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
708}
709
710void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
711 CreateIntToIntLocations(arena_, invoke);
712}
713
714void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
715 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
716}
717
718void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
719 CreateIntToIntLocations(arena_, invoke);
720}
721
722void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
723 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
724}
725
726static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
727 LocationSummary* locations = new (arena) LocationSummary(invoke,
728 LocationSummary::kNoCall,
729 kIntrinsified);
730 locations->SetInAt(0, Location::RequiresRegister());
731 locations->SetInAt(1, Location::RequiresRegister());
732}
733
734static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
735 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
736 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
737 // x86 allows unaligned access. We do not have to check the input or use specific instructions
738 // to avoid a SIGBUS.
739 switch (size) {
740 case Primitive::kPrimByte:
741 __ movb(Address(address, 0), value);
742 break;
743 case Primitive::kPrimShort:
744 __ movw(Address(address, 0), value);
745 break;
746 case Primitive::kPrimInt:
747 __ movl(Address(address, 0), value);
748 break;
749 case Primitive::kPrimLong:
750 __ movq(Address(address, 0), value);
751 break;
752 default:
753 LOG(FATAL) << "Type not recognized for poke: " << size;
754 UNREACHABLE();
755 }
756}
757
758void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
759 CreateIntIntToVoidLocations(arena_, invoke);
760}
761
762void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
763 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
764}
765
766void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
767 CreateIntIntToVoidLocations(arena_, invoke);
768}
769
770void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
771 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
772}
773
774void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
775 CreateIntIntToVoidLocations(arena_, invoke);
776}
777
778void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
779 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
780}
781
782void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
783 CreateIntIntToVoidLocations(arena_, invoke);
784}
785
786void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
787 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
788}
789
790void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
791 LocationSummary* locations = new (arena_) LocationSummary(invoke,
792 LocationSummary::kNoCall,
793 kIntrinsified);
794 locations->SetOut(Location::RequiresRegister());
795}
796
797void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
798 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
799 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
800}
801
Andreas Gampe878d58c2015-01-15 23:24:00 -0800802static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800803 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
804 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
805 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
806 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
807
Andreas Gampe878d58c2015-01-15 23:24:00 -0800808 switch (type) {
809 case Primitive::kPrimInt:
810 case Primitive::kPrimNot:
811 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
812 break;
813
814 case Primitive::kPrimLong:
815 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
816 break;
817
818 default:
819 LOG(FATAL) << "Unsupported op size " << type;
820 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800821 }
822}
823
824static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
825 LocationSummary* locations = new (arena) LocationSummary(invoke,
826 LocationSummary::kNoCall,
827 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800828 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800829 locations->SetInAt(1, Location::RequiresRegister());
830 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800831 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800832}
833
834void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
835 CreateIntIntIntToIntLocations(arena_, invoke);
836}
837void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
838 CreateIntIntIntToIntLocations(arena_, invoke);
839}
840void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
841 CreateIntIntIntToIntLocations(arena_, invoke);
842}
843void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
844 CreateIntIntIntToIntLocations(arena_, invoke);
845}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800846void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
847 CreateIntIntIntToIntLocations(arena_, invoke);
848}
849void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
850 CreateIntIntIntToIntLocations(arena_, invoke);
851}
852
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800853
854void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800855 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800856}
857void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800858 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800859}
860void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800861 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800862}
863void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800864 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800865}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800866void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
867 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
868}
869void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
870 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
871}
872
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800873
874static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
875 Primitive::Type type,
876 HInvoke* invoke) {
877 LocationSummary* locations = new (arena) LocationSummary(invoke,
878 LocationSummary::kNoCall,
879 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800880 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800881 locations->SetInAt(1, Location::RequiresRegister());
882 locations->SetInAt(2, Location::RequiresRegister());
883 locations->SetInAt(3, Location::RequiresRegister());
884 if (type == Primitive::kPrimNot) {
885 // Need temp registers for card-marking.
886 locations->AddTemp(Location::RequiresRegister());
887 locations->AddTemp(Location::RequiresRegister());
888 }
889}
890
891void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
892 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
893}
894void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
895 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
896}
897void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
898 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
899}
900void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
901 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
902}
903void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
904 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
905}
906void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
907 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
908}
909void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
910 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
911}
912void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
913 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
914}
915void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
916 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
917}
918
919// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
920// memory model.
921static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
922 CodeGeneratorX86_64* codegen) {
923 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
924 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
925 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
926 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
927
928 if (type == Primitive::kPrimLong) {
929 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
930 } else {
931 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
932 }
933
934 if (is_volatile) {
935 __ mfence();
936 }
937
938 if (type == Primitive::kPrimNot) {
939 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
940 locations->GetTemp(1).AsRegister<CpuRegister>(),
941 base,
942 value);
943 }
944}
945
946void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
947 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
948}
949void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
950 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
951}
952void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
953 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
954}
955void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
956 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
957}
958void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
959 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
960}
961void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
962 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
963}
964void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
965 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
966}
967void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
968 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
969}
970void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
971 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
972}
973
974// Unimplemented intrinsics.
975
976#define UNIMPLEMENTED_INTRINSIC(Name) \
977void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
978} \
979void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
980}
981
982UNIMPLEMENTED_INTRINSIC(IntegerReverse)
983UNIMPLEMENTED_INTRINSIC(LongReverse)
984UNIMPLEMENTED_INTRINSIC(MathFloor)
985UNIMPLEMENTED_INTRINSIC(MathCeil)
986UNIMPLEMENTED_INTRINSIC(MathRint)
987UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
988UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
989UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
990UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
991UNIMPLEMENTED_INTRINSIC(StringCompareTo)
992UNIMPLEMENTED_INTRINSIC(StringIndexOf)
993UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
994UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
995UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
996UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
997UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
998UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
999
1000} // namespace x86_64
1001} // namespace art