blob: 1fc5432a89f86a5bd7aebcc79e210ac34cf1ca46 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080020#include "code_generator_x86_64.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86_64/assembler_x86_64.h"
28#include "utils/x86_64/constants_x86_64.h"
29
30namespace art {
31
32namespace x86_64 {
33
Mark Mendellfb8d2792015-03-31 22:16:59 -040034IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
35 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
36}
37
38
Andreas Gampe71fb52f2014-12-29 17:43:08 -080039X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
40 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
41}
42
Andreas Gampe878d58c2015-01-15 23:24:00 -080043ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080044 return codegen_->GetGraph()->GetArena();
45}
46
47bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
48 Dispatch(invoke);
49 const LocationSummary* res = invoke->GetLocations();
50 return res != nullptr && res->Intrinsified();
51}
52
53#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
54
55// TODO: trg as memory.
56static void MoveFromReturnRegister(Location trg,
57 Primitive::Type type,
58 CodeGeneratorX86_64* codegen) {
59 if (!trg.IsValid()) {
60 DCHECK(type == Primitive::kPrimVoid);
61 return;
62 }
63
64 switch (type) {
65 case Primitive::kPrimBoolean:
66 case Primitive::kPrimByte:
67 case Primitive::kPrimChar:
68 case Primitive::kPrimShort:
69 case Primitive::kPrimInt:
70 case Primitive::kPrimNot: {
71 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
72 if (trg_reg.AsRegister() != RAX) {
73 __ movl(trg_reg, CpuRegister(RAX));
74 }
75 break;
76 }
77 case Primitive::kPrimLong: {
78 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
79 if (trg_reg.AsRegister() != RAX) {
80 __ movq(trg_reg, CpuRegister(RAX));
81 }
82 break;
83 }
84
85 case Primitive::kPrimVoid:
86 LOG(FATAL) << "Unexpected void type for valid location " << trg;
87 UNREACHABLE();
88
89 case Primitive::kPrimDouble: {
90 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
91 if (trg_reg.AsFloatRegister() != XMM0) {
92 __ movsd(trg_reg, XmmRegister(XMM0));
93 }
94 break;
95 }
96 case Primitive::kPrimFloat: {
97 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
98 if (trg_reg.AsFloatRegister() != XMM0) {
99 __ movss(trg_reg, XmmRegister(XMM0));
100 }
101 break;
102 }
103 }
104}
105
Roland Levillainec525fc2015-04-28 15:50:20 +0100106static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100107 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100108 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800109}
110
111// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
112// call. This will copy the arguments into the positions for a regular call.
113//
114// Note: The actual parameters are required to be in the locations given by the invoke's location
115// summary. If an intrinsic modifies those locations before a slowpath call, they must be
116// restored!
117class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
118 public:
119 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
120
121 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
122 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
123 __ Bind(GetEntryLabel());
124
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000125 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800126
Roland Levillainec525fc2015-04-28 15:50:20 +0100127 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
129 if (invoke_->IsInvokeStaticOrDirect()) {
130 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000131 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800132 } else {
133 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
134 UNREACHABLE();
135 }
136
137 // Copy the result back to the expected output.
138 Location out = invoke_->GetLocations()->Out();
139 if (out.IsValid()) {
140 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
141 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
142 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
143 }
144
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000145 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800146 __ jmp(GetExitLabel());
147 }
148
149 private:
150 // The instruction where this slow path is happening.
151 HInvoke* const invoke_;
152
153 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
154};
155
156#undef __
157#define __ assembler->
158
159static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
160 LocationSummary* locations = new (arena) LocationSummary(invoke,
161 LocationSummary::kNoCall,
162 kIntrinsified);
163 locations->SetInAt(0, Location::RequiresFpuRegister());
164 locations->SetOut(Location::RequiresRegister());
165}
166
167static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
168 LocationSummary* locations = new (arena) LocationSummary(invoke,
169 LocationSummary::kNoCall,
170 kIntrinsified);
171 locations->SetInAt(0, Location::RequiresRegister());
172 locations->SetOut(Location::RequiresFpuRegister());
173}
174
175static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
176 Location input = locations->InAt(0);
177 Location output = locations->Out();
178 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
179}
180
181static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
182 Location input = locations->InAt(0);
183 Location output = locations->Out();
184 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
185}
186
187void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
188 CreateFPToIntLocations(arena_, invoke);
189}
190void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
191 CreateIntToFPLocations(arena_, invoke);
192}
193
194void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
195 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
196}
197void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
198 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
199}
200
201void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
202 CreateFPToIntLocations(arena_, invoke);
203}
204void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
205 CreateIntToFPLocations(arena_, invoke);
206}
207
208void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
209 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
210}
211void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
212 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
213}
214
215static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
216 LocationSummary* locations = new (arena) LocationSummary(invoke,
217 LocationSummary::kNoCall,
218 kIntrinsified);
219 locations->SetInAt(0, Location::RequiresRegister());
220 locations->SetOut(Location::SameAsFirstInput());
221}
222
223static void GenReverseBytes(LocationSummary* locations,
224 Primitive::Type size,
225 X86_64Assembler* assembler) {
226 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
227
228 switch (size) {
229 case Primitive::kPrimShort:
230 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
231 __ bswapl(out);
232 __ sarl(out, Immediate(16));
233 break;
234 case Primitive::kPrimInt:
235 __ bswapl(out);
236 break;
237 case Primitive::kPrimLong:
238 __ bswapq(out);
239 break;
240 default:
241 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
242 UNREACHABLE();
243 }
244}
245
246void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
247 CreateIntToIntLocations(arena_, invoke);
248}
249
250void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
251 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
252}
253
254void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
255 CreateIntToIntLocations(arena_, invoke);
256}
257
258void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
259 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
260}
261
262void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
263 CreateIntToIntLocations(arena_, invoke);
264}
265
266void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
267 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
268}
269
270
271// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
272// need is 64b.
273
274static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
275 // TODO: Enable memory operations when the assembler supports them.
276 LocationSummary* locations = new (arena) LocationSummary(invoke,
277 LocationSummary::kNoCall,
278 kIntrinsified);
279 locations->SetInAt(0, Location::RequiresFpuRegister());
280 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
281 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
282 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400283 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800284}
285
Mark Mendell39dcf552015-04-09 20:42:42 -0400286static void MathAbsFP(LocationSummary* locations,
287 bool is64bit,
288 X86_64Assembler* assembler,
289 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800290 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800291
292 if (output.IsFpuRegister()) {
293 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400294 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800295
Mark Mendell39dcf552015-04-09 20:42:42 -0400296 // TODO: Can mask directly with constant area using pand if we can guarantee
297 // that the literal is aligned on a 16 byte boundary. This will avoid a
298 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800299 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400300 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800301 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
302 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400303 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
305 }
306 } else {
307 // TODO: update when assember support is available.
308 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
309// Once assembler support is available, in-memory operations look like this:
310// if (is64bit) {
311// DCHECK(output.IsDoubleStackSlot());
312// // No 64b and with literal.
313// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
314// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
315// } else {
316// DCHECK(output.IsStackSlot());
317// // Can use and with a literal directly.
318// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
319// }
320 }
321}
322
323void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
324 CreateFloatToFloatPlusTemps(arena_, invoke);
325}
326
327void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400328 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800329}
330
331void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
332 CreateFloatToFloatPlusTemps(arena_, invoke);
333}
334
335void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400336 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800337}
338
339static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
340 LocationSummary* locations = new (arena) LocationSummary(invoke,
341 LocationSummary::kNoCall,
342 kIntrinsified);
343 locations->SetInAt(0, Location::RequiresRegister());
344 locations->SetOut(Location::SameAsFirstInput());
345 locations->AddTemp(Location::RequiresRegister());
346}
347
348static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
349 Location output = locations->Out();
350 CpuRegister out = output.AsRegister<CpuRegister>();
351 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
352
353 if (is64bit) {
354 // Create mask.
355 __ movq(mask, out);
356 __ sarq(mask, Immediate(63));
357 // Add mask.
358 __ addq(out, mask);
359 __ xorq(out, mask);
360 } else {
361 // Create mask.
362 __ movl(mask, out);
363 __ sarl(mask, Immediate(31));
364 // Add mask.
365 __ addl(out, mask);
366 __ xorl(out, mask);
367 }
368}
369
370void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
371 CreateIntToIntPlusTemp(arena_, invoke);
372}
373
374void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
375 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
376}
377
378void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
379 CreateIntToIntPlusTemp(arena_, invoke);
380}
381
382void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
383 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
384}
385
Mark Mendell39dcf552015-04-09 20:42:42 -0400386static void GenMinMaxFP(LocationSummary* locations,
387 bool is_min,
388 bool is_double,
389 X86_64Assembler* assembler,
390 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800391 Location op1_loc = locations->InAt(0);
392 Location op2_loc = locations->InAt(1);
393 Location out_loc = locations->Out();
394 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
395
396 // Shortcut for same input locations.
397 if (op1_loc.Equals(op2_loc)) {
398 DCHECK(out_loc.Equals(op1_loc));
399 return;
400 }
401
402 // (out := op1)
403 // out <=? op2
404 // if Nan jmp Nan_label
405 // if out is min jmp done
406 // if op2 is min jmp op2_label
407 // handle -0/+0
408 // jmp done
409 // Nan_label:
410 // out := NaN
411 // op2_label:
412 // out := op2
413 // done:
414 //
415 // This removes one jmp, but needs to copy one input (op1) to out.
416 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400417 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800418
419 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
420
421 Label nan, done, op2_label;
422 if (is_double) {
423 __ ucomisd(out, op2);
424 } else {
425 __ ucomiss(out, op2);
426 }
427
428 __ j(Condition::kParityEven, &nan);
429
430 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
431 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
432
433 // Handle 0.0/-0.0.
434 if (is_min) {
435 if (is_double) {
436 __ orpd(out, op2);
437 } else {
438 __ orps(out, op2);
439 }
440 } else {
441 if (is_double) {
442 __ andpd(out, op2);
443 } else {
444 __ andps(out, op2);
445 }
446 }
447 __ jmp(&done);
448
449 // NaN handling.
450 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800451 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400452 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800453 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400454 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800455 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800456 __ jmp(&done);
457
458 // out := op2;
459 __ Bind(&op2_label);
460 if (is_double) {
461 __ movsd(out, op2);
462 } else {
463 __ movss(out, op2);
464 }
465
466 // Done.
467 __ Bind(&done);
468}
469
Mark Mendellf55c3e02015-03-26 21:07:46 -0400470static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800471 LocationSummary* locations = new (arena) LocationSummary(invoke,
472 LocationSummary::kNoCall,
473 kIntrinsified);
474 locations->SetInAt(0, Location::RequiresFpuRegister());
475 locations->SetInAt(1, Location::RequiresFpuRegister());
476 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
477 // the second input to be the output (we can simply swap inputs).
478 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800479}
480
481void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400482 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800483}
484
485void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400486 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800487}
488
489void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400490 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800491}
492
493void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400494 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800495}
496
497void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400498 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800499}
500
501void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400502 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800503}
504
505void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400506 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800507}
508
509void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400510 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800511}
512
513static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
514 X86_64Assembler* assembler) {
515 Location op1_loc = locations->InAt(0);
516 Location op2_loc = locations->InAt(1);
517
518 // Shortcut for same input locations.
519 if (op1_loc.Equals(op2_loc)) {
520 // Can return immediately, as op1_loc == out_loc.
521 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
522 // a copy here.
523 DCHECK(locations->Out().Equals(op1_loc));
524 return;
525 }
526
527 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
528 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
529
530 // (out := op1)
531 // out <=? op2
532 // if out is min jmp done
533 // out := op2
534 // done:
535
536 if (is_long) {
537 __ cmpq(out, op2);
538 } else {
539 __ cmpl(out, op2);
540 }
541
542 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
543}
544
545static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
546 LocationSummary* locations = new (arena) LocationSummary(invoke,
547 LocationSummary::kNoCall,
548 kIntrinsified);
549 locations->SetInAt(0, Location::RequiresRegister());
550 locations->SetInAt(1, Location::RequiresRegister());
551 locations->SetOut(Location::SameAsFirstInput());
552}
553
554void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
555 CreateIntIntToIntLocations(arena_, invoke);
556}
557
558void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
559 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
560}
561
562void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
563 CreateIntIntToIntLocations(arena_, invoke);
564}
565
566void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
567 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
568}
569
570void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
571 CreateIntIntToIntLocations(arena_, invoke);
572}
573
574void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
575 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
576}
577
578void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
579 CreateIntIntToIntLocations(arena_, invoke);
580}
581
582void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
583 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
584}
585
586static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
587 LocationSummary* locations = new (arena) LocationSummary(invoke,
588 LocationSummary::kNoCall,
589 kIntrinsified);
590 locations->SetInAt(0, Location::RequiresFpuRegister());
591 locations->SetOut(Location::RequiresFpuRegister());
592}
593
594void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
595 CreateFPToFPLocations(arena_, invoke);
596}
597
598void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
599 LocationSummary* locations = invoke->GetLocations();
600 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
601 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
602
603 GetAssembler()->sqrtsd(out, in);
604}
605
Mark Mendellfb8d2792015-03-31 22:16:59 -0400606static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100607 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400608
609 DCHECK(invoke->IsInvokeStaticOrDirect());
610 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
611 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
612
613 // Copy the result back to the expected output.
614 Location out = invoke->GetLocations()->Out();
615 if (out.IsValid()) {
616 DCHECK(out.IsRegister());
617 MoveFromReturnRegister(out, invoke->GetType(), codegen);
618 }
619}
620
621static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
622 HInvoke* invoke,
623 CodeGeneratorX86_64* codegen) {
624 // Do we have instruction support?
625 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
626 CreateFPToFPLocations(arena, invoke);
627 return;
628 }
629
630 // We have to fall back to a call to the intrinsic.
631 LocationSummary* locations = new (arena) LocationSummary(invoke,
632 LocationSummary::kCall);
633 InvokeRuntimeCallingConvention calling_convention;
634 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
635 locations->SetOut(Location::FpuRegisterLocation(XMM0));
636 // Needs to be RDI for the invoke.
637 locations->AddTemp(Location::RegisterLocation(RDI));
638}
639
640static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
641 HInvoke* invoke,
642 X86_64Assembler* assembler,
643 int round_mode) {
644 LocationSummary* locations = invoke->GetLocations();
645 if (locations->WillCall()) {
646 InvokeOutOfLineIntrinsic(codegen, invoke);
647 } else {
648 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
649 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
650 __ roundsd(out, in, Immediate(round_mode));
651 }
652}
653
654void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
655 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
656}
657
658void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
659 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
660}
661
662void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
663 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
664}
665
666void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
667 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
668}
669
670void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
671 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
672}
673
674void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
675 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
676}
677
678static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
679 HInvoke* invoke,
680 CodeGeneratorX86_64* codegen) {
681 // Do we have instruction support?
682 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
683 LocationSummary* locations = new (arena) LocationSummary(invoke,
684 LocationSummary::kNoCall,
685 kIntrinsified);
686 locations->SetInAt(0, Location::RequiresFpuRegister());
687 locations->SetOut(Location::RequiresFpuRegister());
688 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400689 return;
690 }
691
692 // We have to fall back to a call to the intrinsic.
693 LocationSummary* locations = new (arena) LocationSummary(invoke,
694 LocationSummary::kCall);
695 InvokeRuntimeCallingConvention calling_convention;
696 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
697 locations->SetOut(Location::RegisterLocation(RAX));
698 // Needs to be RDI for the invoke.
699 locations->AddTemp(Location::RegisterLocation(RDI));
700}
701
702void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
703 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
704}
705
706void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
707 LocationSummary* locations = invoke->GetLocations();
708 if (locations->WillCall()) {
709 InvokeOutOfLineIntrinsic(codegen_, invoke);
710 return;
711 }
712
713 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
714 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
715 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400716 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400717 Label done, nan;
718 X86_64Assembler* assembler = GetAssembler();
719
Mark Mendell40741f32015-04-20 22:10:34 -0400720 // Load 0.5 into inPlusPointFive.
721 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400722
723 // Add in the input.
724 __ addss(inPlusPointFive, in);
725
726 // And truncate to an integer.
727 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
728
Mark Mendellfb8d2792015-03-31 22:16:59 -0400729 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400730 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400731 __ j(kAboveEqual, &done);
732
733 // if input == NaN goto nan
734 __ j(kUnordered, &nan);
735
736 // output = float-to-int-truncate(input)
737 __ cvttss2si(out, inPlusPointFive);
738 __ jmp(&done);
739 __ Bind(&nan);
740
741 // output = 0
742 __ xorl(out, out);
743 __ Bind(&done);
744}
745
746void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
747 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
748}
749
750void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
751 LocationSummary* locations = invoke->GetLocations();
752 if (locations->WillCall()) {
753 InvokeOutOfLineIntrinsic(codegen_, invoke);
754 return;
755 }
756
757 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
758 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
759 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400760 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400761 Label done, nan;
762 X86_64Assembler* assembler = GetAssembler();
763
Mark Mendell40741f32015-04-20 22:10:34 -0400764 // Load 0.5 into inPlusPointFive.
765 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400766
767 // Add in the input.
768 __ addsd(inPlusPointFive, in);
769
770 // And truncate to an integer.
771 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
772
Mark Mendellfb8d2792015-03-31 22:16:59 -0400773 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400774 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400775 __ j(kAboveEqual, &done);
776
777 // if input == NaN goto nan
778 __ j(kUnordered, &nan);
779
780 // output = double-to-long-truncate(input)
781 __ cvttsd2si(out, inPlusPointFive, true);
782 __ jmp(&done);
783 __ Bind(&nan);
784
785 // output = 0
786 __ xorq(out, out);
787 __ Bind(&done);
788}
789
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800790void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
791 // The inputs plus one temp.
792 LocationSummary* locations = new (arena_) LocationSummary(invoke,
793 LocationSummary::kCallOnSlowPath,
794 kIntrinsified);
795 locations->SetInAt(0, Location::RequiresRegister());
796 locations->SetInAt(1, Location::RequiresRegister());
797 locations->SetOut(Location::SameAsFirstInput());
798 locations->AddTemp(Location::RequiresRegister());
799}
800
801void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
802 LocationSummary* locations = invoke->GetLocations();
803
804 // Location of reference to data array
805 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
806 // Location of count
807 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800808
809 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
810 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
811 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800812
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800813 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
814 // the cost.
815 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
816 // we will not optimize the code for constants (which would save a register).
817
Andreas Gampe878d58c2015-01-15 23:24:00 -0800818 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800819 codegen_->AddSlowPath(slow_path);
820
821 X86_64Assembler* assembler = GetAssembler();
822
823 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800824 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800825 __ j(kAboveEqual, slow_path->GetEntryLabel());
826
Jeff Hao848f70a2014-01-15 13:49:50 -0800827 // out = out[2*idx].
828 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800829
830 __ Bind(slow_path->GetExitLabel());
831}
832
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000833void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
834 LocationSummary* locations = new (arena_) LocationSummary(invoke,
835 LocationSummary::kCall,
836 kIntrinsified);
837 InvokeRuntimeCallingConvention calling_convention;
838 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
839 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
840 locations->SetOut(Location::RegisterLocation(RAX));
841}
842
843void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
844 X86_64Assembler* assembler = GetAssembler();
845 LocationSummary* locations = invoke->GetLocations();
846
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000847 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100848 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000849
850 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
851 __ testl(argument, argument);
852 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
853 codegen_->AddSlowPath(slow_path);
854 __ j(kEqual, slow_path->GetEntryLabel());
855
856 __ gs()->call(Address::Absolute(
857 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
858 __ Bind(slow_path->GetExitLabel());
859}
860
Jeff Hao848f70a2014-01-15 13:49:50 -0800861void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
862 LocationSummary* locations = new (arena_) LocationSummary(invoke,
863 LocationSummary::kCall,
864 kIntrinsified);
865 InvokeRuntimeCallingConvention calling_convention;
866 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
867 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
868 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
869 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
870 locations->SetOut(Location::RegisterLocation(RAX));
871}
872
873void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
874 X86_64Assembler* assembler = GetAssembler();
875 LocationSummary* locations = invoke->GetLocations();
876
877 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
878 __ testl(byte_array, byte_array);
879 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
880 codegen_->AddSlowPath(slow_path);
881 __ j(kEqual, slow_path->GetEntryLabel());
882
883 __ gs()->call(Address::Absolute(
884 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
885 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
886 __ Bind(slow_path->GetExitLabel());
887}
888
889void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
890 LocationSummary* locations = new (arena_) LocationSummary(invoke,
891 LocationSummary::kCall,
892 kIntrinsified);
893 InvokeRuntimeCallingConvention calling_convention;
894 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
895 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
896 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
897 locations->SetOut(Location::RegisterLocation(RAX));
898}
899
900void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
901 X86_64Assembler* assembler = GetAssembler();
902
903 __ gs()->call(Address::Absolute(
904 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
905 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
906}
907
908void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
909 LocationSummary* locations = new (arena_) LocationSummary(invoke,
910 LocationSummary::kCall,
911 kIntrinsified);
912 InvokeRuntimeCallingConvention calling_convention;
913 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
914 locations->SetOut(Location::RegisterLocation(RAX));
915}
916
917void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
918 X86_64Assembler* assembler = GetAssembler();
919 LocationSummary* locations = invoke->GetLocations();
920
921 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
922 __ testl(string_to_copy, string_to_copy);
923 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
924 codegen_->AddSlowPath(slow_path);
925 __ j(kEqual, slow_path->GetEntryLabel());
926
927 __ gs()->call(Address::Absolute(
928 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
929 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
930 __ Bind(slow_path->GetExitLabel());
931}
932
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800933static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
934 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
935 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
936 // x86 allows unaligned access. We do not have to check the input or use specific instructions
937 // to avoid a SIGBUS.
938 switch (size) {
939 case Primitive::kPrimByte:
940 __ movsxb(out, Address(address, 0));
941 break;
942 case Primitive::kPrimShort:
943 __ movsxw(out, Address(address, 0));
944 break;
945 case Primitive::kPrimInt:
946 __ movl(out, Address(address, 0));
947 break;
948 case Primitive::kPrimLong:
949 __ movq(out, Address(address, 0));
950 break;
951 default:
952 LOG(FATAL) << "Type not recognized for peek: " << size;
953 UNREACHABLE();
954 }
955}
956
957void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
958 CreateIntToIntLocations(arena_, invoke);
959}
960
961void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
962 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
963}
964
965void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
966 CreateIntToIntLocations(arena_, invoke);
967}
968
969void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
970 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
971}
972
973void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
974 CreateIntToIntLocations(arena_, invoke);
975}
976
977void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
978 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
979}
980
981void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
982 CreateIntToIntLocations(arena_, invoke);
983}
984
985void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
986 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
987}
988
989static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
990 LocationSummary* locations = new (arena) LocationSummary(invoke,
991 LocationSummary::kNoCall,
992 kIntrinsified);
993 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -0400994 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800995}
996
997static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
998 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400999 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001000 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1001 // to avoid a SIGBUS.
1002 switch (size) {
1003 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001004 if (value.IsConstant()) {
1005 __ movb(Address(address, 0),
1006 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1007 } else {
1008 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1009 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001010 break;
1011 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001012 if (value.IsConstant()) {
1013 __ movw(Address(address, 0),
1014 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1015 } else {
1016 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1017 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001018 break;
1019 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001020 if (value.IsConstant()) {
1021 __ movl(Address(address, 0),
1022 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1023 } else {
1024 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1025 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001026 break;
1027 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001028 if (value.IsConstant()) {
1029 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1030 DCHECK(IsInt<32>(v));
1031 int32_t v_32 = v;
1032 __ movq(Address(address, 0), Immediate(v_32));
1033 } else {
1034 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1035 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001036 break;
1037 default:
1038 LOG(FATAL) << "Type not recognized for poke: " << size;
1039 UNREACHABLE();
1040 }
1041}
1042
1043void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1044 CreateIntIntToVoidLocations(arena_, invoke);
1045}
1046
1047void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1048 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1049}
1050
1051void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1052 CreateIntIntToVoidLocations(arena_, invoke);
1053}
1054
1055void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1056 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1057}
1058
1059void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1060 CreateIntIntToVoidLocations(arena_, invoke);
1061}
1062
1063void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1064 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1065}
1066
1067void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1068 CreateIntIntToVoidLocations(arena_, invoke);
1069}
1070
1071void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1072 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1073}
1074
1075void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1076 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1077 LocationSummary::kNoCall,
1078 kIntrinsified);
1079 locations->SetOut(Location::RequiresRegister());
1080}
1081
1082void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1083 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1084 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1085}
1086
Andreas Gampe878d58c2015-01-15 23:24:00 -08001087static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001088 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1089 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1090 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1091 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1092
Andreas Gampe878d58c2015-01-15 23:24:00 -08001093 switch (type) {
1094 case Primitive::kPrimInt:
1095 case Primitive::kPrimNot:
1096 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1097 break;
1098
1099 case Primitive::kPrimLong:
1100 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1101 break;
1102
1103 default:
1104 LOG(FATAL) << "Unsupported op size " << type;
1105 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001106 }
1107}
1108
1109static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1110 LocationSummary* locations = new (arena) LocationSummary(invoke,
1111 LocationSummary::kNoCall,
1112 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001113 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001114 locations->SetInAt(1, Location::RequiresRegister());
1115 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001116 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001117}
1118
1119void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1120 CreateIntIntIntToIntLocations(arena_, invoke);
1121}
1122void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1123 CreateIntIntIntToIntLocations(arena_, invoke);
1124}
1125void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1126 CreateIntIntIntToIntLocations(arena_, invoke);
1127}
1128void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1129 CreateIntIntIntToIntLocations(arena_, invoke);
1130}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001131void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1132 CreateIntIntIntToIntLocations(arena_, invoke);
1133}
1134void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1135 CreateIntIntIntToIntLocations(arena_, invoke);
1136}
1137
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001138
1139void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001140 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001141}
1142void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001143 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001144}
1145void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001146 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001147}
1148void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001149 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001150}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001151void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1152 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1153}
1154void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1155 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1156}
1157
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001158
1159static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1160 Primitive::Type type,
1161 HInvoke* invoke) {
1162 LocationSummary* locations = new (arena) LocationSummary(invoke,
1163 LocationSummary::kNoCall,
1164 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001165 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001166 locations->SetInAt(1, Location::RequiresRegister());
1167 locations->SetInAt(2, Location::RequiresRegister());
1168 locations->SetInAt(3, Location::RequiresRegister());
1169 if (type == Primitive::kPrimNot) {
1170 // Need temp registers for card-marking.
1171 locations->AddTemp(Location::RequiresRegister());
1172 locations->AddTemp(Location::RequiresRegister());
1173 }
1174}
1175
1176void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1177 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1178}
1179void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1180 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1181}
1182void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1183 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1184}
1185void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1186 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1187}
1188void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1189 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1190}
1191void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1192 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1193}
1194void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1195 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1196}
1197void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1198 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1199}
1200void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1201 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1202}
1203
1204// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1205// memory model.
1206static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1207 CodeGeneratorX86_64* codegen) {
1208 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1209 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1210 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1211 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1212
1213 if (type == Primitive::kPrimLong) {
1214 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1215 } else {
1216 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1217 }
1218
1219 if (is_volatile) {
1220 __ mfence();
1221 }
1222
1223 if (type == Primitive::kPrimNot) {
1224 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1225 locations->GetTemp(1).AsRegister<CpuRegister>(),
1226 base,
1227 value);
1228 }
1229}
1230
1231void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1232 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1233}
1234void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1235 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1236}
1237void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1238 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1239}
1240void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1241 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1242}
1243void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1244 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1245}
1246void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1247 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1248}
1249void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1250 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1251}
1252void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1253 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1254}
1255void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1256 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1257}
1258
Mark Mendell58d25fd2015-04-03 14:52:31 -04001259static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1260 HInvoke* invoke) {
1261 LocationSummary* locations = new (arena) LocationSummary(invoke,
1262 LocationSummary::kNoCall,
1263 kIntrinsified);
1264 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1265 locations->SetInAt(1, Location::RequiresRegister());
1266 locations->SetInAt(2, Location::RequiresRegister());
1267 // expected value must be in EAX/RAX.
1268 locations->SetInAt(3, Location::RegisterLocation(RAX));
1269 locations->SetInAt(4, Location::RequiresRegister());
1270
1271 locations->SetOut(Location::RequiresRegister());
1272 if (type == Primitive::kPrimNot) {
1273 // Need temp registers for card-marking.
1274 locations->AddTemp(Location::RequiresRegister());
1275 locations->AddTemp(Location::RequiresRegister());
1276 }
1277}
1278
1279void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1280 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1281}
1282
1283void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1284 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1285}
1286
1287void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1288 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1289}
1290
1291static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1292 X86_64Assembler* assembler =
1293 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1294 LocationSummary* locations = invoke->GetLocations();
1295
1296 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1297 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1298 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1299 DCHECK_EQ(expected.AsRegister(), RAX);
1300 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1301 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1302
1303 if (type == Primitive::kPrimLong) {
1304 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1305 } else {
1306 // Integer or object.
1307 if (type == Primitive::kPrimNot) {
1308 // Mark card for object assuming new value is stored.
1309 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1310 locations->GetTemp(1).AsRegister<CpuRegister>(),
1311 base,
1312 value);
1313 }
1314
1315 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1316 }
1317
1318 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1319 // barriers at this time.
1320
1321 // Convert ZF into the boolean result.
1322 __ setcc(kZero, out);
1323 __ movzxb(out, out);
1324}
1325
1326void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1327 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1328}
1329
1330void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1331 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1332}
1333
1334void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1335 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1336}
1337
1338void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1339 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1340 LocationSummary::kNoCall,
1341 kIntrinsified);
1342 locations->SetInAt(0, Location::RequiresRegister());
1343 locations->SetOut(Location::SameAsFirstInput());
1344 locations->AddTemp(Location::RequiresRegister());
1345}
1346
1347static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1348 X86_64Assembler* assembler) {
1349 Immediate imm_shift(shift);
1350 Immediate imm_mask(mask);
1351 __ movl(temp, reg);
1352 __ shrl(reg, imm_shift);
1353 __ andl(temp, imm_mask);
1354 __ andl(reg, imm_mask);
1355 __ shll(temp, imm_shift);
1356 __ orl(reg, temp);
1357}
1358
1359void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1360 X86_64Assembler* assembler =
1361 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1362 LocationSummary* locations = invoke->GetLocations();
1363
1364 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1365 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1366
1367 /*
1368 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1369 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1370 * compared to generic luni implementation which has 5 rounds of swapping bits.
1371 * x = bswap x
1372 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1373 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1374 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1375 */
1376 __ bswapl(reg);
1377 SwapBits(reg, temp, 1, 0x55555555, assembler);
1378 SwapBits(reg, temp, 2, 0x33333333, assembler);
1379 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1380}
1381
1382void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1383 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1384 LocationSummary::kNoCall,
1385 kIntrinsified);
1386 locations->SetInAt(0, Location::RequiresRegister());
1387 locations->SetOut(Location::SameAsFirstInput());
1388 locations->AddTemp(Location::RequiresRegister());
1389 locations->AddTemp(Location::RequiresRegister());
1390}
1391
1392static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1393 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1394 Immediate imm_shift(shift);
1395 __ movq(temp_mask, Immediate(mask));
1396 __ movq(temp, reg);
1397 __ shrq(reg, imm_shift);
1398 __ andq(temp, temp_mask);
1399 __ andq(reg, temp_mask);
1400 __ shlq(temp, imm_shift);
1401 __ orq(reg, temp);
1402}
1403
1404void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1405 X86_64Assembler* assembler =
1406 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1407 LocationSummary* locations = invoke->GetLocations();
1408
1409 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1410 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1411 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1412
1413 /*
1414 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1415 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1416 * compared to generic luni implementation which has 5 rounds of swapping bits.
1417 * x = bswap x
1418 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1419 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1420 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1421 */
1422 __ bswapq(reg);
1423 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1424 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1425 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1426}
1427
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001428// Unimplemented intrinsics.
1429
1430#define UNIMPLEMENTED_INTRINSIC(Name) \
1431void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1432} \
1433void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1434}
1435
Jeff Hao848f70a2014-01-15 13:49:50 -08001436UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001437UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1438UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1439UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001440UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1441
1442} // namespace x86_64
1443} // namespace art