blob: 891531435e94974f527c2eb0a95c2ec0ece76c73 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080023#include "code_generator_x86_64.h"
24#include "entrypoints/quick/quick_entrypoints.h"
25#include "intrinsics.h"
26#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080027#include "mirror/string.h"
28#include "thread.h"
29#include "utils/x86_64/assembler_x86_64.h"
30#include "utils/x86_64/constants_x86_64.h"
31
32namespace art {
33
34namespace x86_64 {
35
Mark Mendellfb8d2792015-03-31 22:16:59 -040036IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38}
39
40
Andreas Gampe71fb52f2014-12-29 17:43:08 -080041X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43}
44
Andreas Gampe878d58c2015-01-15 23:24:00 -080045ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080046 return codegen_->GetGraph()->GetArena();
47}
48
49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53}
54
55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57// TODO: trg as memory.
58static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106}
107
Roland Levillainec525fc2015-04-28 15:50:20 +0100108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800111}
112
113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114// call. This will copy the arguments into the positions for a regular call.
115//
116// Note: The actual parameters are required to be in the locations given by the invoke's location
117// summary. If an intrinsic modifies those locations before a slowpath call, they must be
118// restored!
119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000127 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
Roland Levillainec525fc2015-04-28 15:50:20 +0100129 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800130
131 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100132 codegen->GenerateStaticOrDirectCall(
133 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000134 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800135 } else {
136 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
137 UNREACHABLE();
138 }
139
140 // Copy the result back to the expected output.
141 Location out = invoke_->GetLocations()->Out();
142 if (out.IsValid()) {
143 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
144 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
145 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
146 }
147
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000148 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800149 __ jmp(GetExitLabel());
150 }
151
152 private:
153 // The instruction where this slow path is happening.
154 HInvoke* const invoke_;
155
156 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
157};
158
159#undef __
160#define __ assembler->
161
162static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
163 LocationSummary* locations = new (arena) LocationSummary(invoke,
164 LocationSummary::kNoCall,
165 kIntrinsified);
166 locations->SetInAt(0, Location::RequiresFpuRegister());
167 locations->SetOut(Location::RequiresRegister());
168}
169
170static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
171 LocationSummary* locations = new (arena) LocationSummary(invoke,
172 LocationSummary::kNoCall,
173 kIntrinsified);
174 locations->SetInAt(0, Location::RequiresRegister());
175 locations->SetOut(Location::RequiresFpuRegister());
176}
177
178static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
179 Location input = locations->InAt(0);
180 Location output = locations->Out();
181 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
182}
183
184static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
185 Location input = locations->InAt(0);
186 Location output = locations->Out();
187 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
188}
189
190void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
191 CreateFPToIntLocations(arena_, invoke);
192}
193void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
194 CreateIntToFPLocations(arena_, invoke);
195}
196
197void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
198 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
199}
200void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
201 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
202}
203
204void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
205 CreateFPToIntLocations(arena_, invoke);
206}
207void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
208 CreateIntToFPLocations(arena_, invoke);
209}
210
211void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
212 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
213}
214void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
215 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
216}
217
218static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
219 LocationSummary* locations = new (arena) LocationSummary(invoke,
220 LocationSummary::kNoCall,
221 kIntrinsified);
222 locations->SetInAt(0, Location::RequiresRegister());
223 locations->SetOut(Location::SameAsFirstInput());
224}
225
226static void GenReverseBytes(LocationSummary* locations,
227 Primitive::Type size,
228 X86_64Assembler* assembler) {
229 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
230
231 switch (size) {
232 case Primitive::kPrimShort:
233 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
234 __ bswapl(out);
235 __ sarl(out, Immediate(16));
236 break;
237 case Primitive::kPrimInt:
238 __ bswapl(out);
239 break;
240 case Primitive::kPrimLong:
241 __ bswapq(out);
242 break;
243 default:
244 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
245 UNREACHABLE();
246 }
247}
248
249void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
250 CreateIntToIntLocations(arena_, invoke);
251}
252
253void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
254 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
255}
256
257void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
258 CreateIntToIntLocations(arena_, invoke);
259}
260
261void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
262 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
263}
264
265void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
266 CreateIntToIntLocations(arena_, invoke);
267}
268
269void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
270 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
271}
272
273
274// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
275// need is 64b.
276
277static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
278 // TODO: Enable memory operations when the assembler supports them.
279 LocationSummary* locations = new (arena) LocationSummary(invoke,
280 LocationSummary::kNoCall,
281 kIntrinsified);
282 locations->SetInAt(0, Location::RequiresFpuRegister());
283 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
284 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
285 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400286 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800287}
288
Mark Mendell39dcf552015-04-09 20:42:42 -0400289static void MathAbsFP(LocationSummary* locations,
290 bool is64bit,
291 X86_64Assembler* assembler,
292 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800293 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800294
295 if (output.IsFpuRegister()) {
296 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400297 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800298
Mark Mendell39dcf552015-04-09 20:42:42 -0400299 // TODO: Can mask directly with constant area using pand if we can guarantee
300 // that the literal is aligned on a 16 byte boundary. This will avoid a
301 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800302 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400303 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
305 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400306 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800307 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
308 }
309 } else {
310 // TODO: update when assember support is available.
311 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
312// Once assembler support is available, in-memory operations look like this:
313// if (is64bit) {
314// DCHECK(output.IsDoubleStackSlot());
315// // No 64b and with literal.
316// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
317// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
318// } else {
319// DCHECK(output.IsStackSlot());
320// // Can use and with a literal directly.
321// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
322// }
323 }
324}
325
326void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
327 CreateFloatToFloatPlusTemps(arena_, invoke);
328}
329
330void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400331 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800332}
333
334void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
335 CreateFloatToFloatPlusTemps(arena_, invoke);
336}
337
338void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400339 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800340}
341
342static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
343 LocationSummary* locations = new (arena) LocationSummary(invoke,
344 LocationSummary::kNoCall,
345 kIntrinsified);
346 locations->SetInAt(0, Location::RequiresRegister());
347 locations->SetOut(Location::SameAsFirstInput());
348 locations->AddTemp(Location::RequiresRegister());
349}
350
351static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
352 Location output = locations->Out();
353 CpuRegister out = output.AsRegister<CpuRegister>();
354 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
355
356 if (is64bit) {
357 // Create mask.
358 __ movq(mask, out);
359 __ sarq(mask, Immediate(63));
360 // Add mask.
361 __ addq(out, mask);
362 __ xorq(out, mask);
363 } else {
364 // Create mask.
365 __ movl(mask, out);
366 __ sarl(mask, Immediate(31));
367 // Add mask.
368 __ addl(out, mask);
369 __ xorl(out, mask);
370 }
371}
372
373void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
374 CreateIntToIntPlusTemp(arena_, invoke);
375}
376
377void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
378 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
379}
380
381void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
382 CreateIntToIntPlusTemp(arena_, invoke);
383}
384
385void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
386 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
387}
388
Mark Mendell39dcf552015-04-09 20:42:42 -0400389static void GenMinMaxFP(LocationSummary* locations,
390 bool is_min,
391 bool is_double,
392 X86_64Assembler* assembler,
393 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800394 Location op1_loc = locations->InAt(0);
395 Location op2_loc = locations->InAt(1);
396 Location out_loc = locations->Out();
397 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
398
399 // Shortcut for same input locations.
400 if (op1_loc.Equals(op2_loc)) {
401 DCHECK(out_loc.Equals(op1_loc));
402 return;
403 }
404
405 // (out := op1)
406 // out <=? op2
407 // if Nan jmp Nan_label
408 // if out is min jmp done
409 // if op2 is min jmp op2_label
410 // handle -0/+0
411 // jmp done
412 // Nan_label:
413 // out := NaN
414 // op2_label:
415 // out := op2
416 // done:
417 //
418 // This removes one jmp, but needs to copy one input (op1) to out.
419 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400420 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800421
422 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
423
424 Label nan, done, op2_label;
425 if (is_double) {
426 __ ucomisd(out, op2);
427 } else {
428 __ ucomiss(out, op2);
429 }
430
431 __ j(Condition::kParityEven, &nan);
432
433 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
434 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
435
436 // Handle 0.0/-0.0.
437 if (is_min) {
438 if (is_double) {
439 __ orpd(out, op2);
440 } else {
441 __ orps(out, op2);
442 }
443 } else {
444 if (is_double) {
445 __ andpd(out, op2);
446 } else {
447 __ andps(out, op2);
448 }
449 }
450 __ jmp(&done);
451
452 // NaN handling.
453 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800454 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400455 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800456 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400457 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800459 __ jmp(&done);
460
461 // out := op2;
462 __ Bind(&op2_label);
463 if (is_double) {
464 __ movsd(out, op2);
465 } else {
466 __ movss(out, op2);
467 }
468
469 // Done.
470 __ Bind(&done);
471}
472
Mark Mendellf55c3e02015-03-26 21:07:46 -0400473static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800474 LocationSummary* locations = new (arena) LocationSummary(invoke,
475 LocationSummary::kNoCall,
476 kIntrinsified);
477 locations->SetInAt(0, Location::RequiresFpuRegister());
478 locations->SetInAt(1, Location::RequiresFpuRegister());
479 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
480 // the second input to be the output (we can simply swap inputs).
481 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800482}
483
484void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400485 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800486}
487
488void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400489 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800490}
491
492void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400493 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800494}
495
496void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400497 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800498}
499
500void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400501 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800502}
503
504void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400505 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800506}
507
508void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400509 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800510}
511
512void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400513 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800514}
515
516static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
517 X86_64Assembler* assembler) {
518 Location op1_loc = locations->InAt(0);
519 Location op2_loc = locations->InAt(1);
520
521 // Shortcut for same input locations.
522 if (op1_loc.Equals(op2_loc)) {
523 // Can return immediately, as op1_loc == out_loc.
524 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
525 // a copy here.
526 DCHECK(locations->Out().Equals(op1_loc));
527 return;
528 }
529
530 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
531 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
532
533 // (out := op1)
534 // out <=? op2
535 // if out is min jmp done
536 // out := op2
537 // done:
538
539 if (is_long) {
540 __ cmpq(out, op2);
541 } else {
542 __ cmpl(out, op2);
543 }
544
545 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
546}
547
548static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
549 LocationSummary* locations = new (arena) LocationSummary(invoke,
550 LocationSummary::kNoCall,
551 kIntrinsified);
552 locations->SetInAt(0, Location::RequiresRegister());
553 locations->SetInAt(1, Location::RequiresRegister());
554 locations->SetOut(Location::SameAsFirstInput());
555}
556
557void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
558 CreateIntIntToIntLocations(arena_, invoke);
559}
560
561void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
562 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
563}
564
565void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
566 CreateIntIntToIntLocations(arena_, invoke);
567}
568
569void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
570 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
571}
572
573void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
574 CreateIntIntToIntLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
578 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
582 CreateIntIntToIntLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
586 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
587}
588
589static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
590 LocationSummary* locations = new (arena) LocationSummary(invoke,
591 LocationSummary::kNoCall,
592 kIntrinsified);
593 locations->SetInAt(0, Location::RequiresFpuRegister());
594 locations->SetOut(Location::RequiresFpuRegister());
595}
596
597void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
598 CreateFPToFPLocations(arena_, invoke);
599}
600
601void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
602 LocationSummary* locations = invoke->GetLocations();
603 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
604 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
605
606 GetAssembler()->sqrtsd(out, in);
607}
608
Mark Mendellfb8d2792015-03-31 22:16:59 -0400609static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100610 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400611
612 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100613 codegen->GenerateStaticOrDirectCall(
614 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400615 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
616
617 // Copy the result back to the expected output.
618 Location out = invoke->GetLocations()->Out();
619 if (out.IsValid()) {
620 DCHECK(out.IsRegister());
621 MoveFromReturnRegister(out, invoke->GetType(), codegen);
622 }
623}
624
625static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
626 HInvoke* invoke,
627 CodeGeneratorX86_64* codegen) {
628 // Do we have instruction support?
629 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
630 CreateFPToFPLocations(arena, invoke);
631 return;
632 }
633
634 // We have to fall back to a call to the intrinsic.
635 LocationSummary* locations = new (arena) LocationSummary(invoke,
636 LocationSummary::kCall);
637 InvokeRuntimeCallingConvention calling_convention;
638 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
639 locations->SetOut(Location::FpuRegisterLocation(XMM0));
640 // Needs to be RDI for the invoke.
641 locations->AddTemp(Location::RegisterLocation(RDI));
642}
643
644static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
645 HInvoke* invoke,
646 X86_64Assembler* assembler,
647 int round_mode) {
648 LocationSummary* locations = invoke->GetLocations();
649 if (locations->WillCall()) {
650 InvokeOutOfLineIntrinsic(codegen, invoke);
651 } else {
652 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
653 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
654 __ roundsd(out, in, Immediate(round_mode));
655 }
656}
657
658void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
659 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
660}
661
662void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
663 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
664}
665
666void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
667 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
668}
669
670void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
671 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
672}
673
674void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
675 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
676}
677
678void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
679 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
680}
681
682static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
683 HInvoke* invoke,
684 CodeGeneratorX86_64* codegen) {
685 // Do we have instruction support?
686 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
687 LocationSummary* locations = new (arena) LocationSummary(invoke,
688 LocationSummary::kNoCall,
689 kIntrinsified);
690 locations->SetInAt(0, Location::RequiresFpuRegister());
691 locations->SetOut(Location::RequiresFpuRegister());
692 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400693 return;
694 }
695
696 // We have to fall back to a call to the intrinsic.
697 LocationSummary* locations = new (arena) LocationSummary(invoke,
698 LocationSummary::kCall);
699 InvokeRuntimeCallingConvention calling_convention;
700 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
701 locations->SetOut(Location::RegisterLocation(RAX));
702 // Needs to be RDI for the invoke.
703 locations->AddTemp(Location::RegisterLocation(RDI));
704}
705
706void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
707 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
708}
709
710void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
711 LocationSummary* locations = invoke->GetLocations();
712 if (locations->WillCall()) {
713 InvokeOutOfLineIntrinsic(codegen_, invoke);
714 return;
715 }
716
717 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
718 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
719 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400720 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400721 Label done, nan;
722 X86_64Assembler* assembler = GetAssembler();
723
Mark Mendell40741f32015-04-20 22:10:34 -0400724 // Load 0.5 into inPlusPointFive.
725 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400726
727 // Add in the input.
728 __ addss(inPlusPointFive, in);
729
730 // And truncate to an integer.
731 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
732
Mark Mendellfb8d2792015-03-31 22:16:59 -0400733 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400734 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400735 __ j(kAboveEqual, &done);
736
737 // if input == NaN goto nan
738 __ j(kUnordered, &nan);
739
740 // output = float-to-int-truncate(input)
741 __ cvttss2si(out, inPlusPointFive);
742 __ jmp(&done);
743 __ Bind(&nan);
744
745 // output = 0
746 __ xorl(out, out);
747 __ Bind(&done);
748}
749
750void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
751 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
752}
753
754void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
755 LocationSummary* locations = invoke->GetLocations();
756 if (locations->WillCall()) {
757 InvokeOutOfLineIntrinsic(codegen_, invoke);
758 return;
759 }
760
761 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
762 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
763 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400764 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400765 Label done, nan;
766 X86_64Assembler* assembler = GetAssembler();
767
Mark Mendell40741f32015-04-20 22:10:34 -0400768 // Load 0.5 into inPlusPointFive.
769 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400770
771 // Add in the input.
772 __ addsd(inPlusPointFive, in);
773
774 // And truncate to an integer.
775 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
776
Mark Mendellfb8d2792015-03-31 22:16:59 -0400777 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400778 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400779 __ j(kAboveEqual, &done);
780
781 // if input == NaN goto nan
782 __ j(kUnordered, &nan);
783
784 // output = double-to-long-truncate(input)
785 __ cvttsd2si(out, inPlusPointFive, true);
786 __ jmp(&done);
787 __ Bind(&nan);
788
789 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400790 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400791 __ Bind(&done);
792}
793
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800794void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
795 // The inputs plus one temp.
796 LocationSummary* locations = new (arena_) LocationSummary(invoke,
797 LocationSummary::kCallOnSlowPath,
798 kIntrinsified);
799 locations->SetInAt(0, Location::RequiresRegister());
800 locations->SetInAt(1, Location::RequiresRegister());
801 locations->SetOut(Location::SameAsFirstInput());
802 locations->AddTemp(Location::RequiresRegister());
803}
804
805void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
806 LocationSummary* locations = invoke->GetLocations();
807
808 // Location of reference to data array
809 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
810 // Location of count
811 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800812
813 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
814 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
815 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800816
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800817 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
818 // the cost.
819 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
820 // we will not optimize the code for constants (which would save a register).
821
Andreas Gampe878d58c2015-01-15 23:24:00 -0800822 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800823 codegen_->AddSlowPath(slow_path);
824
825 X86_64Assembler* assembler = GetAssembler();
826
827 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800828 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800829 __ j(kAboveEqual, slow_path->GetEntryLabel());
830
Jeff Hao848f70a2014-01-15 13:49:50 -0800831 // out = out[2*idx].
832 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800833
834 __ Bind(slow_path->GetExitLabel());
835}
836
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000837void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
838 LocationSummary* locations = new (arena_) LocationSummary(invoke,
839 LocationSummary::kCall,
840 kIntrinsified);
841 InvokeRuntimeCallingConvention calling_convention;
842 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
843 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
844 locations->SetOut(Location::RegisterLocation(RAX));
845}
846
847void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
848 X86_64Assembler* assembler = GetAssembler();
849 LocationSummary* locations = invoke->GetLocations();
850
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000851 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100852 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000853
854 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
855 __ testl(argument, argument);
856 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
857 codegen_->AddSlowPath(slow_path);
858 __ j(kEqual, slow_path->GetEntryLabel());
859
860 __ gs()->call(Address::Absolute(
861 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
862 __ Bind(slow_path->GetExitLabel());
863}
864
Andreas Gampe21030dd2015-05-07 14:46:15 -0700865static void CreateStringIndexOfLocations(HInvoke* invoke,
866 ArenaAllocator* allocator,
867 bool start_at_zero) {
868 LocationSummary* locations = new (allocator) LocationSummary(invoke,
869 LocationSummary::kCallOnSlowPath,
870 kIntrinsified);
871 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
872 locations->SetInAt(0, Location::RegisterLocation(RDI));
873 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
874 // allocator to do that, anyways. We can still do the constant check by checking the parameter
875 // of the instruction explicitly.
876 // Note: This works as we don't clobber RAX anywhere.
877 locations->SetInAt(1, Location::RegisterLocation(RAX));
878 if (!start_at_zero) {
879 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
880 }
881 // As we clobber RDI during execution anyways, also use it as the output.
882 locations->SetOut(Location::SameAsFirstInput());
883
884 // repne scasw uses RCX as the counter.
885 locations->AddTemp(Location::RegisterLocation(RCX));
886 // Need another temporary to be able to compute the result.
887 locations->AddTemp(Location::RequiresRegister());
888}
889
890static void GenerateStringIndexOf(HInvoke* invoke,
891 X86_64Assembler* assembler,
892 CodeGeneratorX86_64* codegen,
893 ArenaAllocator* allocator,
894 bool start_at_zero) {
895 LocationSummary* locations = invoke->GetLocations();
896
897 // Note that the null check must have been done earlier.
898 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
899
900 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
901 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
902 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
903 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
904 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
905
906 // Check our assumptions for registers.
907 DCHECK_EQ(string_obj.AsRegister(), RDI);
908 DCHECK_EQ(search_value.AsRegister(), RAX);
909 DCHECK_EQ(counter.AsRegister(), RCX);
910 DCHECK_EQ(out.AsRegister(), RDI);
911
912 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
913 // or directly dispatch if we have a constant.
914 SlowPathCodeX86_64* slow_path = nullptr;
915 if (invoke->InputAt(1)->IsIntConstant()) {
916 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
917 std::numeric_limits<uint16_t>::max()) {
918 // Always needs the slow-path. We could directly dispatch to it, but this case should be
919 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
920 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
921 codegen->AddSlowPath(slow_path);
922 __ jmp(slow_path->GetEntryLabel());
923 __ Bind(slow_path->GetExitLabel());
924 return;
925 }
926 } else {
927 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
928 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
929 codegen->AddSlowPath(slow_path);
930 __ j(kAbove, slow_path->GetEntryLabel());
931 }
932
933 // From here down, we know that we are looking for a char that fits in 16 bits.
934 // Location of reference to data array within the String object.
935 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
936 // Location of count within the String object.
937 int32_t count_offset = mirror::String::CountOffset().Int32Value();
938
939 // Load string length, i.e., the count field of the string.
940 __ movl(string_length, Address(string_obj, count_offset));
941
942 // Do a length check.
943 // TODO: Support jecxz.
944 Label not_found_label;
945 __ testl(string_length, string_length);
946 __ j(kEqual, &not_found_label);
947
948 if (start_at_zero) {
949 // Number of chars to scan is the same as the string length.
950 __ movl(counter, string_length);
951
952 // Move to the start of the string.
953 __ addq(string_obj, Immediate(value_offset));
954 } else {
955 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
956
957 // Do a start_index check.
958 __ cmpl(start_index, string_length);
959 __ j(kGreaterEqual, &not_found_label);
960
961 // Ensure we have a start index >= 0;
962 __ xorl(counter, counter);
963 __ cmpl(start_index, Immediate(0));
964 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
965
966 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
967 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
968
969 // Now update ecx, the work counter: it's gonna be string.length - start_index.
970 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
971 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
972 }
973
974 // Everything is set up for repne scasw:
975 // * Comparison address in RDI.
976 // * Counter in ECX.
977 __ repne_scasw();
978
979 // Did we find a match?
980 __ j(kNotEqual, &not_found_label);
981
982 // Yes, we matched. Compute the index of the result.
983 __ subl(string_length, counter);
984 __ leal(out, Address(string_length, -1));
985
986 Label done;
987 __ jmp(&done);
988
989 // Failed to match; return -1.
990 __ Bind(&not_found_label);
991 __ movl(out, Immediate(-1));
992
993 // And join up at the end.
994 __ Bind(&done);
995 if (slow_path != nullptr) {
996 __ Bind(slow_path->GetExitLabel());
997 }
998}
999
1000void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1001 CreateStringIndexOfLocations(invoke, arena_, true);
1002}
1003
1004void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1005 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1006}
1007
1008void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1009 CreateStringIndexOfLocations(invoke, arena_, false);
1010}
1011
1012void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1013 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1014}
1015
Jeff Hao848f70a2014-01-15 13:49:50 -08001016void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1017 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1018 LocationSummary::kCall,
1019 kIntrinsified);
1020 InvokeRuntimeCallingConvention calling_convention;
1021 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1022 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1023 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1024 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1025 locations->SetOut(Location::RegisterLocation(RAX));
1026}
1027
1028void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1029 X86_64Assembler* assembler = GetAssembler();
1030 LocationSummary* locations = invoke->GetLocations();
1031
1032 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1033 __ testl(byte_array, byte_array);
1034 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1035 codegen_->AddSlowPath(slow_path);
1036 __ j(kEqual, slow_path->GetEntryLabel());
1037
1038 __ gs()->call(Address::Absolute(
1039 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1040 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1041 __ Bind(slow_path->GetExitLabel());
1042}
1043
1044void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1045 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1046 LocationSummary::kCall,
1047 kIntrinsified);
1048 InvokeRuntimeCallingConvention calling_convention;
1049 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1050 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1051 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1052 locations->SetOut(Location::RegisterLocation(RAX));
1053}
1054
1055void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1056 X86_64Assembler* assembler = GetAssembler();
1057
1058 __ gs()->call(Address::Absolute(
1059 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1060 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1061}
1062
1063void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1064 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1065 LocationSummary::kCall,
1066 kIntrinsified);
1067 InvokeRuntimeCallingConvention calling_convention;
1068 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1069 locations->SetOut(Location::RegisterLocation(RAX));
1070}
1071
1072void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1073 X86_64Assembler* assembler = GetAssembler();
1074 LocationSummary* locations = invoke->GetLocations();
1075
1076 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1077 __ testl(string_to_copy, string_to_copy);
1078 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1079 codegen_->AddSlowPath(slow_path);
1080 __ j(kEqual, slow_path->GetEntryLabel());
1081
1082 __ gs()->call(Address::Absolute(
1083 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1084 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1085 __ Bind(slow_path->GetExitLabel());
1086}
1087
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001088static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1089 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1090 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1091 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1092 // to avoid a SIGBUS.
1093 switch (size) {
1094 case Primitive::kPrimByte:
1095 __ movsxb(out, Address(address, 0));
1096 break;
1097 case Primitive::kPrimShort:
1098 __ movsxw(out, Address(address, 0));
1099 break;
1100 case Primitive::kPrimInt:
1101 __ movl(out, Address(address, 0));
1102 break;
1103 case Primitive::kPrimLong:
1104 __ movq(out, Address(address, 0));
1105 break;
1106 default:
1107 LOG(FATAL) << "Type not recognized for peek: " << size;
1108 UNREACHABLE();
1109 }
1110}
1111
1112void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1113 CreateIntToIntLocations(arena_, invoke);
1114}
1115
1116void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1117 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1118}
1119
1120void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1121 CreateIntToIntLocations(arena_, invoke);
1122}
1123
1124void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1125 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1126}
1127
1128void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1129 CreateIntToIntLocations(arena_, invoke);
1130}
1131
1132void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1133 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1134}
1135
1136void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1137 CreateIntToIntLocations(arena_, invoke);
1138}
1139
1140void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1141 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1142}
1143
1144static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1145 LocationSummary* locations = new (arena) LocationSummary(invoke,
1146 LocationSummary::kNoCall,
1147 kIntrinsified);
1148 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001149 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001150}
1151
1152static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1153 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001154 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001155 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1156 // to avoid a SIGBUS.
1157 switch (size) {
1158 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001159 if (value.IsConstant()) {
1160 __ movb(Address(address, 0),
1161 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1162 } else {
1163 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1164 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001165 break;
1166 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001167 if (value.IsConstant()) {
1168 __ movw(Address(address, 0),
1169 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1170 } else {
1171 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1172 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001173 break;
1174 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001175 if (value.IsConstant()) {
1176 __ movl(Address(address, 0),
1177 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1178 } else {
1179 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1180 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001181 break;
1182 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001183 if (value.IsConstant()) {
1184 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1185 DCHECK(IsInt<32>(v));
1186 int32_t v_32 = v;
1187 __ movq(Address(address, 0), Immediate(v_32));
1188 } else {
1189 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1190 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001191 break;
1192 default:
1193 LOG(FATAL) << "Type not recognized for poke: " << size;
1194 UNREACHABLE();
1195 }
1196}
1197
1198void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1199 CreateIntIntToVoidLocations(arena_, invoke);
1200}
1201
1202void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1203 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1204}
1205
1206void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1207 CreateIntIntToVoidLocations(arena_, invoke);
1208}
1209
1210void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1211 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1212}
1213
1214void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1215 CreateIntIntToVoidLocations(arena_, invoke);
1216}
1217
1218void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1219 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1220}
1221
1222void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1223 CreateIntIntToVoidLocations(arena_, invoke);
1224}
1225
1226void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1227 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1228}
1229
1230void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1231 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1232 LocationSummary::kNoCall,
1233 kIntrinsified);
1234 locations->SetOut(Location::RequiresRegister());
1235}
1236
1237void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1238 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1239 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1240}
1241
Andreas Gampe878d58c2015-01-15 23:24:00 -08001242static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001243 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1244 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1245 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1246 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1247
Andreas Gampe878d58c2015-01-15 23:24:00 -08001248 switch (type) {
1249 case Primitive::kPrimInt:
1250 case Primitive::kPrimNot:
1251 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1252 break;
1253
1254 case Primitive::kPrimLong:
1255 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1256 break;
1257
1258 default:
1259 LOG(FATAL) << "Unsupported op size " << type;
1260 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001261 }
1262}
1263
1264static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1265 LocationSummary* locations = new (arena) LocationSummary(invoke,
1266 LocationSummary::kNoCall,
1267 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001268 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001269 locations->SetInAt(1, Location::RequiresRegister());
1270 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001271 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001272}
1273
1274void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1275 CreateIntIntIntToIntLocations(arena_, invoke);
1276}
1277void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1278 CreateIntIntIntToIntLocations(arena_, invoke);
1279}
1280void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1281 CreateIntIntIntToIntLocations(arena_, invoke);
1282}
1283void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1284 CreateIntIntIntToIntLocations(arena_, invoke);
1285}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001286void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1287 CreateIntIntIntToIntLocations(arena_, invoke);
1288}
1289void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1290 CreateIntIntIntToIntLocations(arena_, invoke);
1291}
1292
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001293
1294void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001295 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001296}
1297void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001298 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001299}
1300void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001301 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001302}
1303void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001304 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001305}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001306void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1307 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1308}
1309void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1310 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1311}
1312
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001313
1314static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1315 Primitive::Type type,
1316 HInvoke* invoke) {
1317 LocationSummary* locations = new (arena) LocationSummary(invoke,
1318 LocationSummary::kNoCall,
1319 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001320 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001321 locations->SetInAt(1, Location::RequiresRegister());
1322 locations->SetInAt(2, Location::RequiresRegister());
1323 locations->SetInAt(3, Location::RequiresRegister());
1324 if (type == Primitive::kPrimNot) {
1325 // Need temp registers for card-marking.
1326 locations->AddTemp(Location::RequiresRegister());
1327 locations->AddTemp(Location::RequiresRegister());
1328 }
1329}
1330
1331void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1332 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1333}
1334void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1335 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1336}
1337void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1338 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1339}
1340void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1341 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1342}
1343void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1344 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1345}
1346void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1347 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1348}
1349void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1350 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1351}
1352void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1353 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1354}
1355void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1356 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1357}
1358
1359// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1360// memory model.
1361static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1362 CodeGeneratorX86_64* codegen) {
1363 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1364 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1365 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1366 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1367
1368 if (type == Primitive::kPrimLong) {
1369 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1370 } else {
1371 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1372 }
1373
1374 if (is_volatile) {
1375 __ mfence();
1376 }
1377
1378 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001379 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001380 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1381 locations->GetTemp(1).AsRegister<CpuRegister>(),
1382 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001383 value,
1384 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001385 }
1386}
1387
1388void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1389 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1390}
1391void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1392 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1393}
1394void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1395 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1396}
1397void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1398 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1399}
1400void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1401 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1402}
1403void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1404 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1405}
1406void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1407 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1408}
1409void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1410 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1411}
1412void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1413 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1414}
1415
Mark Mendell58d25fd2015-04-03 14:52:31 -04001416static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1417 HInvoke* invoke) {
1418 LocationSummary* locations = new (arena) LocationSummary(invoke,
1419 LocationSummary::kNoCall,
1420 kIntrinsified);
1421 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1422 locations->SetInAt(1, Location::RequiresRegister());
1423 locations->SetInAt(2, Location::RequiresRegister());
1424 // expected value must be in EAX/RAX.
1425 locations->SetInAt(3, Location::RegisterLocation(RAX));
1426 locations->SetInAt(4, Location::RequiresRegister());
1427
1428 locations->SetOut(Location::RequiresRegister());
1429 if (type == Primitive::kPrimNot) {
1430 // Need temp registers for card-marking.
1431 locations->AddTemp(Location::RequiresRegister());
1432 locations->AddTemp(Location::RequiresRegister());
1433 }
1434}
1435
1436void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1437 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1438}
1439
1440void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1441 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1442}
1443
1444void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1445 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1446}
1447
1448static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1449 X86_64Assembler* assembler =
1450 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1451 LocationSummary* locations = invoke->GetLocations();
1452
1453 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1454 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1455 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1456 DCHECK_EQ(expected.AsRegister(), RAX);
1457 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1458 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1459
1460 if (type == Primitive::kPrimLong) {
1461 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1462 } else {
1463 // Integer or object.
1464 if (type == Primitive::kPrimNot) {
1465 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001466 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001467 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1468 locations->GetTemp(1).AsRegister<CpuRegister>(),
1469 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001470 value,
1471 value_can_be_null);
Mark Mendell58d25fd2015-04-03 14:52:31 -04001472 }
1473
1474 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1475 }
1476
1477 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1478 // barriers at this time.
1479
1480 // Convert ZF into the boolean result.
1481 __ setcc(kZero, out);
1482 __ movzxb(out, out);
1483}
1484
1485void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1486 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1487}
1488
1489void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1490 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1491}
1492
1493void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1494 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1495}
1496
1497void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1498 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1499 LocationSummary::kNoCall,
1500 kIntrinsified);
1501 locations->SetInAt(0, Location::RequiresRegister());
1502 locations->SetOut(Location::SameAsFirstInput());
1503 locations->AddTemp(Location::RequiresRegister());
1504}
1505
1506static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1507 X86_64Assembler* assembler) {
1508 Immediate imm_shift(shift);
1509 Immediate imm_mask(mask);
1510 __ movl(temp, reg);
1511 __ shrl(reg, imm_shift);
1512 __ andl(temp, imm_mask);
1513 __ andl(reg, imm_mask);
1514 __ shll(temp, imm_shift);
1515 __ orl(reg, temp);
1516}
1517
1518void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1519 X86_64Assembler* assembler =
1520 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1521 LocationSummary* locations = invoke->GetLocations();
1522
1523 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1524 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1525
1526 /*
1527 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1528 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1529 * compared to generic luni implementation which has 5 rounds of swapping bits.
1530 * x = bswap x
1531 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1532 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1533 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1534 */
1535 __ bswapl(reg);
1536 SwapBits(reg, temp, 1, 0x55555555, assembler);
1537 SwapBits(reg, temp, 2, 0x33333333, assembler);
1538 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1539}
1540
1541void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1542 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1543 LocationSummary::kNoCall,
1544 kIntrinsified);
1545 locations->SetInAt(0, Location::RequiresRegister());
1546 locations->SetOut(Location::SameAsFirstInput());
1547 locations->AddTemp(Location::RequiresRegister());
1548 locations->AddTemp(Location::RequiresRegister());
1549}
1550
1551static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1552 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1553 Immediate imm_shift(shift);
1554 __ movq(temp_mask, Immediate(mask));
1555 __ movq(temp, reg);
1556 __ shrq(reg, imm_shift);
1557 __ andq(temp, temp_mask);
1558 __ andq(reg, temp_mask);
1559 __ shlq(temp, imm_shift);
1560 __ orq(reg, temp);
1561}
1562
1563void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1564 X86_64Assembler* assembler =
1565 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1566 LocationSummary* locations = invoke->GetLocations();
1567
1568 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1569 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1570 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1571
1572 /*
1573 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1574 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1575 * compared to generic luni implementation which has 5 rounds of swapping bits.
1576 * x = bswap x
1577 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1578 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1579 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1580 */
1581 __ bswapq(reg);
1582 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1583 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1584 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1585}
1586
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001587// Unimplemented intrinsics.
1588
1589#define UNIMPLEMENTED_INTRINSIC(Name) \
1590void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1591} \
1592void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1593}
1594
Jeff Hao848f70a2014-01-15 13:49:50 -08001595UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001596UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001597UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1598
1599} // namespace x86_64
1600} // namespace art