blob: ea342e9382c900090d7ef6d358764f7ffd93466d [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080023#include "code_generator_x86_64.h"
24#include "entrypoints/quick/quick_entrypoints.h"
25#include "intrinsics.h"
26#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080027#include "mirror/string.h"
28#include "thread.h"
29#include "utils/x86_64/assembler_x86_64.h"
30#include "utils/x86_64/constants_x86_64.h"
31
32namespace art {
33
34namespace x86_64 {
35
Mark Mendellfb8d2792015-03-31 22:16:59 -040036IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38}
39
40
Andreas Gampe71fb52f2014-12-29 17:43:08 -080041X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43}
44
Andreas Gampe878d58c2015-01-15 23:24:00 -080045ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080046 return codegen_->GetGraph()->GetArena();
47}
48
49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53}
54
55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57// TODO: trg as memory.
58static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106}
107
Roland Levillainec525fc2015-04-28 15:50:20 +0100108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800111}
112
113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114// call. This will copy the arguments into the positions for a regular call.
115//
116// Note: The actual parameters are required to be in the locations given by the invoke's location
117// summary. If an intrinsic modifies those locations before a slowpath call, they must be
118// restored!
119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000127 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
Roland Levillainec525fc2015-04-28 15:50:20 +0100129 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800130
131 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100132 codegen->GenerateStaticOrDirectCall(
133 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000134 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800135 } else {
136 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
137 UNREACHABLE();
138 }
139
140 // Copy the result back to the expected output.
141 Location out = invoke_->GetLocations()->Out();
142 if (out.IsValid()) {
143 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
144 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
145 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
146 }
147
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000148 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800149 __ jmp(GetExitLabel());
150 }
151
Alexandre Rames9931f312015-06-19 14:47:01 +0100152 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
153
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800154 private:
155 // The instruction where this slow path is happening.
156 HInvoke* const invoke_;
157
158 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
159};
160
161#undef __
162#define __ assembler->
163
164static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
165 LocationSummary* locations = new (arena) LocationSummary(invoke,
166 LocationSummary::kNoCall,
167 kIntrinsified);
168 locations->SetInAt(0, Location::RequiresFpuRegister());
169 locations->SetOut(Location::RequiresRegister());
170}
171
172static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
173 LocationSummary* locations = new (arena) LocationSummary(invoke,
174 LocationSummary::kNoCall,
175 kIntrinsified);
176 locations->SetInAt(0, Location::RequiresRegister());
177 locations->SetOut(Location::RequiresFpuRegister());
178}
179
180static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
181 Location input = locations->InAt(0);
182 Location output = locations->Out();
183 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
184}
185
186static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
190}
191
192void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
193 CreateFPToIntLocations(arena_, invoke);
194}
195void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
196 CreateIntToFPLocations(arena_, invoke);
197}
198
199void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
201}
202void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
204}
205
206void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
207 CreateFPToIntLocations(arena_, invoke);
208}
209void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
210 CreateIntToFPLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
215}
216void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
218}
219
220static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
221 LocationSummary* locations = new (arena) LocationSummary(invoke,
222 LocationSummary::kNoCall,
223 kIntrinsified);
224 locations->SetInAt(0, Location::RequiresRegister());
225 locations->SetOut(Location::SameAsFirstInput());
226}
227
228static void GenReverseBytes(LocationSummary* locations,
229 Primitive::Type size,
230 X86_64Assembler* assembler) {
231 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
232
233 switch (size) {
234 case Primitive::kPrimShort:
235 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
236 __ bswapl(out);
237 __ sarl(out, Immediate(16));
238 break;
239 case Primitive::kPrimInt:
240 __ bswapl(out);
241 break;
242 case Primitive::kPrimLong:
243 __ bswapq(out);
244 break;
245 default:
246 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
247 UNREACHABLE();
248 }
249}
250
251void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
252 CreateIntToIntLocations(arena_, invoke);
253}
254
255void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
256 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
257}
258
259void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
260 CreateIntToIntLocations(arena_, invoke);
261}
262
263void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
264 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
265}
266
267void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
268 CreateIntToIntLocations(arena_, invoke);
269}
270
271void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
272 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
273}
274
275
276// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
277// need is 64b.
278
279static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
280 // TODO: Enable memory operations when the assembler supports them.
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresFpuRegister());
285 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
286 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
287 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400288 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800289}
290
Mark Mendell39dcf552015-04-09 20:42:42 -0400291static void MathAbsFP(LocationSummary* locations,
292 bool is64bit,
293 X86_64Assembler* assembler,
294 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800295 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800296
297 if (output.IsFpuRegister()) {
298 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400299 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800300
Mark Mendell39dcf552015-04-09 20:42:42 -0400301 // TODO: Can mask directly with constant area using pand if we can guarantee
302 // that the literal is aligned on a 16 byte boundary. This will avoid a
303 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400305 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800306 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
307 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400308 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800309 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
310 }
311 } else {
312 // TODO: update when assember support is available.
313 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
314// Once assembler support is available, in-memory operations look like this:
315// if (is64bit) {
316// DCHECK(output.IsDoubleStackSlot());
317// // No 64b and with literal.
318// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
319// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
320// } else {
321// DCHECK(output.IsStackSlot());
322// // Can use and with a literal directly.
323// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
324// }
325 }
326}
327
328void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
329 CreateFloatToFloatPlusTemps(arena_, invoke);
330}
331
332void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400333 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800334}
335
336void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
337 CreateFloatToFloatPlusTemps(arena_, invoke);
338}
339
340void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400341 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800342}
343
344static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
345 LocationSummary* locations = new (arena) LocationSummary(invoke,
346 LocationSummary::kNoCall,
347 kIntrinsified);
348 locations->SetInAt(0, Location::RequiresRegister());
349 locations->SetOut(Location::SameAsFirstInput());
350 locations->AddTemp(Location::RequiresRegister());
351}
352
353static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
354 Location output = locations->Out();
355 CpuRegister out = output.AsRegister<CpuRegister>();
356 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
357
358 if (is64bit) {
359 // Create mask.
360 __ movq(mask, out);
361 __ sarq(mask, Immediate(63));
362 // Add mask.
363 __ addq(out, mask);
364 __ xorq(out, mask);
365 } else {
366 // Create mask.
367 __ movl(mask, out);
368 __ sarl(mask, Immediate(31));
369 // Add mask.
370 __ addl(out, mask);
371 __ xorl(out, mask);
372 }
373}
374
375void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
376 CreateIntToIntPlusTemp(arena_, invoke);
377}
378
379void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
380 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
381}
382
383void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
384 CreateIntToIntPlusTemp(arena_, invoke);
385}
386
387void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
388 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
389}
390
Mark Mendell39dcf552015-04-09 20:42:42 -0400391static void GenMinMaxFP(LocationSummary* locations,
392 bool is_min,
393 bool is_double,
394 X86_64Assembler* assembler,
395 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800396 Location op1_loc = locations->InAt(0);
397 Location op2_loc = locations->InAt(1);
398 Location out_loc = locations->Out();
399 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
400
401 // Shortcut for same input locations.
402 if (op1_loc.Equals(op2_loc)) {
403 DCHECK(out_loc.Equals(op1_loc));
404 return;
405 }
406
407 // (out := op1)
408 // out <=? op2
409 // if Nan jmp Nan_label
410 // if out is min jmp done
411 // if op2 is min jmp op2_label
412 // handle -0/+0
413 // jmp done
414 // Nan_label:
415 // out := NaN
416 // op2_label:
417 // out := op2
418 // done:
419 //
420 // This removes one jmp, but needs to copy one input (op1) to out.
421 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400422 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800423
424 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
425
426 Label nan, done, op2_label;
427 if (is_double) {
428 __ ucomisd(out, op2);
429 } else {
430 __ ucomiss(out, op2);
431 }
432
433 __ j(Condition::kParityEven, &nan);
434
435 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
436 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
437
438 // Handle 0.0/-0.0.
439 if (is_min) {
440 if (is_double) {
441 __ orpd(out, op2);
442 } else {
443 __ orps(out, op2);
444 }
445 } else {
446 if (is_double) {
447 __ andpd(out, op2);
448 } else {
449 __ andps(out, op2);
450 }
451 }
452 __ jmp(&done);
453
454 // NaN handling.
455 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800456 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400457 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400459 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800460 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800461 __ jmp(&done);
462
463 // out := op2;
464 __ Bind(&op2_label);
465 if (is_double) {
466 __ movsd(out, op2);
467 } else {
468 __ movss(out, op2);
469 }
470
471 // Done.
472 __ Bind(&done);
473}
474
Mark Mendellf55c3e02015-03-26 21:07:46 -0400475static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800476 LocationSummary* locations = new (arena) LocationSummary(invoke,
477 LocationSummary::kNoCall,
478 kIntrinsified);
479 locations->SetInAt(0, Location::RequiresFpuRegister());
480 locations->SetInAt(1, Location::RequiresFpuRegister());
481 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
482 // the second input to be the output (we can simply swap inputs).
483 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800484}
485
486void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400487 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800488}
489
490void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400491 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800492}
493
494void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400495 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800496}
497
498void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400499 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800500}
501
502void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400503 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800504}
505
506void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400507 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800508}
509
510void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400511 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800512}
513
514void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400515 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800516}
517
518static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
519 X86_64Assembler* assembler) {
520 Location op1_loc = locations->InAt(0);
521 Location op2_loc = locations->InAt(1);
522
523 // Shortcut for same input locations.
524 if (op1_loc.Equals(op2_loc)) {
525 // Can return immediately, as op1_loc == out_loc.
526 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
527 // a copy here.
528 DCHECK(locations->Out().Equals(op1_loc));
529 return;
530 }
531
532 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
533 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
534
535 // (out := op1)
536 // out <=? op2
537 // if out is min jmp done
538 // out := op2
539 // done:
540
541 if (is_long) {
542 __ cmpq(out, op2);
543 } else {
544 __ cmpl(out, op2);
545 }
546
547 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
548}
549
550static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
551 LocationSummary* locations = new (arena) LocationSummary(invoke,
552 LocationSummary::kNoCall,
553 kIntrinsified);
554 locations->SetInAt(0, Location::RequiresRegister());
555 locations->SetInAt(1, Location::RequiresRegister());
556 locations->SetOut(Location::SameAsFirstInput());
557}
558
559void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
560 CreateIntIntToIntLocations(arena_, invoke);
561}
562
563void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
564 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
565}
566
567void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
568 CreateIntIntToIntLocations(arena_, invoke);
569}
570
571void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
572 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
573}
574
575void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
576 CreateIntIntToIntLocations(arena_, invoke);
577}
578
579void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
580 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
581}
582
583void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
584 CreateIntIntToIntLocations(arena_, invoke);
585}
586
587void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
588 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
589}
590
591static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
592 LocationSummary* locations = new (arena) LocationSummary(invoke,
593 LocationSummary::kNoCall,
594 kIntrinsified);
595 locations->SetInAt(0, Location::RequiresFpuRegister());
596 locations->SetOut(Location::RequiresFpuRegister());
597}
598
599void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
600 CreateFPToFPLocations(arena_, invoke);
601}
602
603void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
604 LocationSummary* locations = invoke->GetLocations();
605 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
606 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
607
608 GetAssembler()->sqrtsd(out, in);
609}
610
Mark Mendellfb8d2792015-03-31 22:16:59 -0400611static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100612 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400613
614 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100615 codegen->GenerateStaticOrDirectCall(
616 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400617 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
618
619 // Copy the result back to the expected output.
620 Location out = invoke->GetLocations()->Out();
621 if (out.IsValid()) {
622 DCHECK(out.IsRegister());
623 MoveFromReturnRegister(out, invoke->GetType(), codegen);
624 }
625}
626
627static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
628 HInvoke* invoke,
629 CodeGeneratorX86_64* codegen) {
630 // Do we have instruction support?
631 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
632 CreateFPToFPLocations(arena, invoke);
633 return;
634 }
635
636 // We have to fall back to a call to the intrinsic.
637 LocationSummary* locations = new (arena) LocationSummary(invoke,
638 LocationSummary::kCall);
639 InvokeRuntimeCallingConvention calling_convention;
640 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
641 locations->SetOut(Location::FpuRegisterLocation(XMM0));
642 // Needs to be RDI for the invoke.
643 locations->AddTemp(Location::RegisterLocation(RDI));
644}
645
646static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
647 HInvoke* invoke,
648 X86_64Assembler* assembler,
649 int round_mode) {
650 LocationSummary* locations = invoke->GetLocations();
651 if (locations->WillCall()) {
652 InvokeOutOfLineIntrinsic(codegen, invoke);
653 } else {
654 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
655 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
656 __ roundsd(out, in, Immediate(round_mode));
657 }
658}
659
660void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
661 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
662}
663
664void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
665 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
666}
667
668void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
669 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
670}
671
672void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
673 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
674}
675
676void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
677 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
678}
679
680void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
681 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
682}
683
684static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
685 HInvoke* invoke,
686 CodeGeneratorX86_64* codegen) {
687 // Do we have instruction support?
688 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
689 LocationSummary* locations = new (arena) LocationSummary(invoke,
690 LocationSummary::kNoCall,
691 kIntrinsified);
692 locations->SetInAt(0, Location::RequiresFpuRegister());
693 locations->SetOut(Location::RequiresFpuRegister());
694 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400695 return;
696 }
697
698 // We have to fall back to a call to the intrinsic.
699 LocationSummary* locations = new (arena) LocationSummary(invoke,
700 LocationSummary::kCall);
701 InvokeRuntimeCallingConvention calling_convention;
702 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
703 locations->SetOut(Location::RegisterLocation(RAX));
704 // Needs to be RDI for the invoke.
705 locations->AddTemp(Location::RegisterLocation(RDI));
706}
707
708void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
709 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
710}
711
712void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
713 LocationSummary* locations = invoke->GetLocations();
714 if (locations->WillCall()) {
715 InvokeOutOfLineIntrinsic(codegen_, invoke);
716 return;
717 }
718
719 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
720 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
721 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400722 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400723 Label done, nan;
724 X86_64Assembler* assembler = GetAssembler();
725
Mark Mendell40741f32015-04-20 22:10:34 -0400726 // Load 0.5 into inPlusPointFive.
727 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400728
729 // Add in the input.
730 __ addss(inPlusPointFive, in);
731
732 // And truncate to an integer.
733 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
734
Mark Mendellfb8d2792015-03-31 22:16:59 -0400735 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400736 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400737 __ j(kAboveEqual, &done);
738
739 // if input == NaN goto nan
740 __ j(kUnordered, &nan);
741
742 // output = float-to-int-truncate(input)
743 __ cvttss2si(out, inPlusPointFive);
744 __ jmp(&done);
745 __ Bind(&nan);
746
747 // output = 0
748 __ xorl(out, out);
749 __ Bind(&done);
750}
751
752void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
753 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
754}
755
756void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
757 LocationSummary* locations = invoke->GetLocations();
758 if (locations->WillCall()) {
759 InvokeOutOfLineIntrinsic(codegen_, invoke);
760 return;
761 }
762
763 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
764 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
765 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400766 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400767 Label done, nan;
768 X86_64Assembler* assembler = GetAssembler();
769
Mark Mendell40741f32015-04-20 22:10:34 -0400770 // Load 0.5 into inPlusPointFive.
771 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400772
773 // Add in the input.
774 __ addsd(inPlusPointFive, in);
775
776 // And truncate to an integer.
777 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
778
Mark Mendellfb8d2792015-03-31 22:16:59 -0400779 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400780 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400781 __ j(kAboveEqual, &done);
782
783 // if input == NaN goto nan
784 __ j(kUnordered, &nan);
785
786 // output = double-to-long-truncate(input)
787 __ cvttsd2si(out, inPlusPointFive, true);
788 __ jmp(&done);
789 __ Bind(&nan);
790
791 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400792 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400793 __ Bind(&done);
794}
795
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800796void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
797 // The inputs plus one temp.
798 LocationSummary* locations = new (arena_) LocationSummary(invoke,
799 LocationSummary::kCallOnSlowPath,
800 kIntrinsified);
801 locations->SetInAt(0, Location::RequiresRegister());
802 locations->SetInAt(1, Location::RequiresRegister());
803 locations->SetOut(Location::SameAsFirstInput());
804 locations->AddTemp(Location::RequiresRegister());
805}
806
807void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
808 LocationSummary* locations = invoke->GetLocations();
809
810 // Location of reference to data array
811 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
812 // Location of count
813 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800814
815 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
816 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
817 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800818
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800819 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
820 // the cost.
821 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
822 // we will not optimize the code for constants (which would save a register).
823
Andreas Gampe878d58c2015-01-15 23:24:00 -0800824 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800825 codegen_->AddSlowPath(slow_path);
826
827 X86_64Assembler* assembler = GetAssembler();
828
829 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800830 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800831 __ j(kAboveEqual, slow_path->GetEntryLabel());
832
Jeff Hao848f70a2014-01-15 13:49:50 -0800833 // out = out[2*idx].
834 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800835
836 __ Bind(slow_path->GetExitLabel());
837}
838
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000839void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
840 LocationSummary* locations = new (arena_) LocationSummary(invoke,
841 LocationSummary::kCall,
842 kIntrinsified);
843 InvokeRuntimeCallingConvention calling_convention;
844 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
845 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
846 locations->SetOut(Location::RegisterLocation(RAX));
847}
848
849void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
850 X86_64Assembler* assembler = GetAssembler();
851 LocationSummary* locations = invoke->GetLocations();
852
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000853 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100854 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000855
856 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
857 __ testl(argument, argument);
858 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
859 codegen_->AddSlowPath(slow_path);
860 __ j(kEqual, slow_path->GetEntryLabel());
861
862 __ gs()->call(Address::Absolute(
863 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
864 __ Bind(slow_path->GetExitLabel());
865}
866
Andreas Gampe21030dd2015-05-07 14:46:15 -0700867static void CreateStringIndexOfLocations(HInvoke* invoke,
868 ArenaAllocator* allocator,
869 bool start_at_zero) {
870 LocationSummary* locations = new (allocator) LocationSummary(invoke,
871 LocationSummary::kCallOnSlowPath,
872 kIntrinsified);
873 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
874 locations->SetInAt(0, Location::RegisterLocation(RDI));
875 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
876 // allocator to do that, anyways. We can still do the constant check by checking the parameter
877 // of the instruction explicitly.
878 // Note: This works as we don't clobber RAX anywhere.
879 locations->SetInAt(1, Location::RegisterLocation(RAX));
880 if (!start_at_zero) {
881 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
882 }
883 // As we clobber RDI during execution anyways, also use it as the output.
884 locations->SetOut(Location::SameAsFirstInput());
885
886 // repne scasw uses RCX as the counter.
887 locations->AddTemp(Location::RegisterLocation(RCX));
888 // Need another temporary to be able to compute the result.
889 locations->AddTemp(Location::RequiresRegister());
890}
891
892static void GenerateStringIndexOf(HInvoke* invoke,
893 X86_64Assembler* assembler,
894 CodeGeneratorX86_64* codegen,
895 ArenaAllocator* allocator,
896 bool start_at_zero) {
897 LocationSummary* locations = invoke->GetLocations();
898
899 // Note that the null check must have been done earlier.
900 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
901
902 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
903 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
904 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
905 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
906 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
907
908 // Check our assumptions for registers.
909 DCHECK_EQ(string_obj.AsRegister(), RDI);
910 DCHECK_EQ(search_value.AsRegister(), RAX);
911 DCHECK_EQ(counter.AsRegister(), RCX);
912 DCHECK_EQ(out.AsRegister(), RDI);
913
914 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
915 // or directly dispatch if we have a constant.
916 SlowPathCodeX86_64* slow_path = nullptr;
917 if (invoke->InputAt(1)->IsIntConstant()) {
918 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
919 std::numeric_limits<uint16_t>::max()) {
920 // Always needs the slow-path. We could directly dispatch to it, but this case should be
921 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
922 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
923 codegen->AddSlowPath(slow_path);
924 __ jmp(slow_path->GetEntryLabel());
925 __ Bind(slow_path->GetExitLabel());
926 return;
927 }
928 } else {
929 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
930 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
931 codegen->AddSlowPath(slow_path);
932 __ j(kAbove, slow_path->GetEntryLabel());
933 }
934
935 // From here down, we know that we are looking for a char that fits in 16 bits.
936 // Location of reference to data array within the String object.
937 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
938 // Location of count within the String object.
939 int32_t count_offset = mirror::String::CountOffset().Int32Value();
940
941 // Load string length, i.e., the count field of the string.
942 __ movl(string_length, Address(string_obj, count_offset));
943
944 // Do a length check.
945 // TODO: Support jecxz.
946 Label not_found_label;
947 __ testl(string_length, string_length);
948 __ j(kEqual, &not_found_label);
949
950 if (start_at_zero) {
951 // Number of chars to scan is the same as the string length.
952 __ movl(counter, string_length);
953
954 // Move to the start of the string.
955 __ addq(string_obj, Immediate(value_offset));
956 } else {
957 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
958
959 // Do a start_index check.
960 __ cmpl(start_index, string_length);
961 __ j(kGreaterEqual, &not_found_label);
962
963 // Ensure we have a start index >= 0;
964 __ xorl(counter, counter);
965 __ cmpl(start_index, Immediate(0));
966 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
967
968 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
969 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
970
971 // Now update ecx, the work counter: it's gonna be string.length - start_index.
972 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
973 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
974 }
975
976 // Everything is set up for repne scasw:
977 // * Comparison address in RDI.
978 // * Counter in ECX.
979 __ repne_scasw();
980
981 // Did we find a match?
982 __ j(kNotEqual, &not_found_label);
983
984 // Yes, we matched. Compute the index of the result.
985 __ subl(string_length, counter);
986 __ leal(out, Address(string_length, -1));
987
988 Label done;
989 __ jmp(&done);
990
991 // Failed to match; return -1.
992 __ Bind(&not_found_label);
993 __ movl(out, Immediate(-1));
994
995 // And join up at the end.
996 __ Bind(&done);
997 if (slow_path != nullptr) {
998 __ Bind(slow_path->GetExitLabel());
999 }
1000}
1001
1002void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1003 CreateStringIndexOfLocations(invoke, arena_, true);
1004}
1005
1006void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1007 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1008}
1009
1010void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1011 CreateStringIndexOfLocations(invoke, arena_, false);
1012}
1013
1014void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1015 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1016}
1017
Jeff Hao848f70a2014-01-15 13:49:50 -08001018void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1019 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1020 LocationSummary::kCall,
1021 kIntrinsified);
1022 InvokeRuntimeCallingConvention calling_convention;
1023 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1024 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1025 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1026 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1027 locations->SetOut(Location::RegisterLocation(RAX));
1028}
1029
1030void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1031 X86_64Assembler* assembler = GetAssembler();
1032 LocationSummary* locations = invoke->GetLocations();
1033
1034 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1035 __ testl(byte_array, byte_array);
1036 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1037 codegen_->AddSlowPath(slow_path);
1038 __ j(kEqual, slow_path->GetEntryLabel());
1039
1040 __ gs()->call(Address::Absolute(
1041 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1042 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1043 __ Bind(slow_path->GetExitLabel());
1044}
1045
1046void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1047 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1048 LocationSummary::kCall,
1049 kIntrinsified);
1050 InvokeRuntimeCallingConvention calling_convention;
1051 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1052 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1053 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1054 locations->SetOut(Location::RegisterLocation(RAX));
1055}
1056
1057void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1058 X86_64Assembler* assembler = GetAssembler();
1059
1060 __ gs()->call(Address::Absolute(
1061 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1062 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1063}
1064
1065void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1066 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1067 LocationSummary::kCall,
1068 kIntrinsified);
1069 InvokeRuntimeCallingConvention calling_convention;
1070 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1071 locations->SetOut(Location::RegisterLocation(RAX));
1072}
1073
1074void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1075 X86_64Assembler* assembler = GetAssembler();
1076 LocationSummary* locations = invoke->GetLocations();
1077
1078 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1079 __ testl(string_to_copy, string_to_copy);
1080 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1081 codegen_->AddSlowPath(slow_path);
1082 __ j(kEqual, slow_path->GetEntryLabel());
1083
1084 __ gs()->call(Address::Absolute(
1085 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1086 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1087 __ Bind(slow_path->GetExitLabel());
1088}
1089
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001090static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1091 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1092 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1093 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1094 // to avoid a SIGBUS.
1095 switch (size) {
1096 case Primitive::kPrimByte:
1097 __ movsxb(out, Address(address, 0));
1098 break;
1099 case Primitive::kPrimShort:
1100 __ movsxw(out, Address(address, 0));
1101 break;
1102 case Primitive::kPrimInt:
1103 __ movl(out, Address(address, 0));
1104 break;
1105 case Primitive::kPrimLong:
1106 __ movq(out, Address(address, 0));
1107 break;
1108 default:
1109 LOG(FATAL) << "Type not recognized for peek: " << size;
1110 UNREACHABLE();
1111 }
1112}
1113
1114void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1115 CreateIntToIntLocations(arena_, invoke);
1116}
1117
1118void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1119 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1120}
1121
1122void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1123 CreateIntToIntLocations(arena_, invoke);
1124}
1125
1126void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1127 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1128}
1129
1130void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1131 CreateIntToIntLocations(arena_, invoke);
1132}
1133
1134void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1135 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1136}
1137
1138void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1139 CreateIntToIntLocations(arena_, invoke);
1140}
1141
1142void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1143 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1144}
1145
1146static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1147 LocationSummary* locations = new (arena) LocationSummary(invoke,
1148 LocationSummary::kNoCall,
1149 kIntrinsified);
1150 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001151 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001152}
1153
1154static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1155 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001156 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001157 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1158 // to avoid a SIGBUS.
1159 switch (size) {
1160 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001161 if (value.IsConstant()) {
1162 __ movb(Address(address, 0),
1163 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1164 } else {
1165 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1166 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001167 break;
1168 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001169 if (value.IsConstant()) {
1170 __ movw(Address(address, 0),
1171 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1172 } else {
1173 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1174 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001175 break;
1176 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001177 if (value.IsConstant()) {
1178 __ movl(Address(address, 0),
1179 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1180 } else {
1181 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1182 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001183 break;
1184 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001185 if (value.IsConstant()) {
1186 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1187 DCHECK(IsInt<32>(v));
1188 int32_t v_32 = v;
1189 __ movq(Address(address, 0), Immediate(v_32));
1190 } else {
1191 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1192 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001193 break;
1194 default:
1195 LOG(FATAL) << "Type not recognized for poke: " << size;
1196 UNREACHABLE();
1197 }
1198}
1199
1200void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1201 CreateIntIntToVoidLocations(arena_, invoke);
1202}
1203
1204void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1205 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1206}
1207
1208void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1209 CreateIntIntToVoidLocations(arena_, invoke);
1210}
1211
1212void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1213 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1214}
1215
1216void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1217 CreateIntIntToVoidLocations(arena_, invoke);
1218}
1219
1220void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1221 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1222}
1223
1224void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1225 CreateIntIntToVoidLocations(arena_, invoke);
1226}
1227
1228void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1229 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1230}
1231
1232void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1233 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1234 LocationSummary::kNoCall,
1235 kIntrinsified);
1236 locations->SetOut(Location::RequiresRegister());
1237}
1238
1239void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1240 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1241 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1242}
1243
Andreas Gampe878d58c2015-01-15 23:24:00 -08001244static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001245 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1246 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1247 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1248 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1249
Andreas Gampe878d58c2015-01-15 23:24:00 -08001250 switch (type) {
1251 case Primitive::kPrimInt:
1252 case Primitive::kPrimNot:
1253 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain4d027112015-07-01 15:41:14 +01001254 if (type == Primitive::kPrimNot) {
1255 __ MaybeUnpoisonHeapReference(trg);
1256 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001257 break;
1258
1259 case Primitive::kPrimLong:
1260 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1261 break;
1262
1263 default:
1264 LOG(FATAL) << "Unsupported op size " << type;
1265 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001266 }
1267}
1268
1269static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1270 LocationSummary* locations = new (arena) LocationSummary(invoke,
1271 LocationSummary::kNoCall,
1272 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001273 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001274 locations->SetInAt(1, Location::RequiresRegister());
1275 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001276 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001277}
1278
1279void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1280 CreateIntIntIntToIntLocations(arena_, invoke);
1281}
1282void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1283 CreateIntIntIntToIntLocations(arena_, invoke);
1284}
1285void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1286 CreateIntIntIntToIntLocations(arena_, invoke);
1287}
1288void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1289 CreateIntIntIntToIntLocations(arena_, invoke);
1290}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001291void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1292 CreateIntIntIntToIntLocations(arena_, invoke);
1293}
1294void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1295 CreateIntIntIntToIntLocations(arena_, invoke);
1296}
1297
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001298
1299void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001300 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001301}
1302void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001303 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001304}
1305void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001306 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001307}
1308void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001309 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001310}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001311void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1312 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1313}
1314void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1315 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1316}
1317
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001318
1319static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1320 Primitive::Type type,
1321 HInvoke* invoke) {
1322 LocationSummary* locations = new (arena) LocationSummary(invoke,
1323 LocationSummary::kNoCall,
1324 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001325 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001326 locations->SetInAt(1, Location::RequiresRegister());
1327 locations->SetInAt(2, Location::RequiresRegister());
1328 locations->SetInAt(3, Location::RequiresRegister());
1329 if (type == Primitive::kPrimNot) {
1330 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001331 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001332 locations->AddTemp(Location::RequiresRegister());
1333 }
1334}
1335
1336void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1337 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1338}
1339void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1340 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1341}
1342void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1343 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1344}
1345void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1346 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1347}
1348void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1349 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1350}
1351void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1352 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1353}
1354void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1355 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1356}
1357void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1358 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1359}
1360void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1361 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1362}
1363
1364// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1365// memory model.
1366static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1367 CodeGeneratorX86_64* codegen) {
1368 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1369 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1370 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1371 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1372
1373 if (type == Primitive::kPrimLong) {
1374 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01001375 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1376 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1377 __ movl(temp, value);
1378 __ PoisonHeapReference(temp);
1379 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001380 } else {
1381 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1382 }
1383
1384 if (is_volatile) {
1385 __ mfence();
1386 }
1387
1388 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001389 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001390 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1391 locations->GetTemp(1).AsRegister<CpuRegister>(),
1392 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001393 value,
1394 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001395 }
1396}
1397
1398void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1399 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1400}
1401void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1402 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1403}
1404void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1405 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1406}
1407void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1408 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1409}
1410void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1411 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1412}
1413void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1414 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1415}
1416void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1417 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1418}
1419void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1420 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1421}
1422void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1423 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1424}
1425
Mark Mendell58d25fd2015-04-03 14:52:31 -04001426static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1427 HInvoke* invoke) {
1428 LocationSummary* locations = new (arena) LocationSummary(invoke,
1429 LocationSummary::kNoCall,
1430 kIntrinsified);
1431 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1432 locations->SetInAt(1, Location::RequiresRegister());
1433 locations->SetInAt(2, Location::RequiresRegister());
1434 // expected value must be in EAX/RAX.
1435 locations->SetInAt(3, Location::RegisterLocation(RAX));
1436 locations->SetInAt(4, Location::RequiresRegister());
1437
1438 locations->SetOut(Location::RequiresRegister());
1439 if (type == Primitive::kPrimNot) {
1440 // Need temp registers for card-marking.
1441 locations->AddTemp(Location::RequiresRegister());
1442 locations->AddTemp(Location::RequiresRegister());
1443 }
1444}
1445
1446void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1447 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1448}
1449
1450void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1451 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1452}
1453
1454void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1455 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1456}
1457
1458static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1459 X86_64Assembler* assembler =
1460 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1461 LocationSummary* locations = invoke->GetLocations();
1462
1463 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1464 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1465 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1466 DCHECK_EQ(expected.AsRegister(), RAX);
1467 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1468 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1469
1470 if (type == Primitive::kPrimLong) {
1471 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1472 } else {
1473 // Integer or object.
1474 if (type == Primitive::kPrimNot) {
1475 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001476 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001477 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1478 locations->GetTemp(1).AsRegister<CpuRegister>(),
1479 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001480 value,
1481 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01001482
1483 if (kPoisonHeapReferences) {
1484 __ PoisonHeapReference(expected);
1485 __ PoisonHeapReference(value);
1486 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001487 }
1488
1489 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1490 }
1491
1492 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1493 // barriers at this time.
1494
1495 // Convert ZF into the boolean result.
1496 __ setcc(kZero, out);
1497 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01001498
1499 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1500 __ UnpoisonHeapReference(value);
1501 __ UnpoisonHeapReference(expected);
1502 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001503}
1504
1505void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1506 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1507}
1508
1509void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1510 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1511}
1512
1513void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1514 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1515}
1516
1517void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1518 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1519 LocationSummary::kNoCall,
1520 kIntrinsified);
1521 locations->SetInAt(0, Location::RequiresRegister());
1522 locations->SetOut(Location::SameAsFirstInput());
1523 locations->AddTemp(Location::RequiresRegister());
1524}
1525
1526static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1527 X86_64Assembler* assembler) {
1528 Immediate imm_shift(shift);
1529 Immediate imm_mask(mask);
1530 __ movl(temp, reg);
1531 __ shrl(reg, imm_shift);
1532 __ andl(temp, imm_mask);
1533 __ andl(reg, imm_mask);
1534 __ shll(temp, imm_shift);
1535 __ orl(reg, temp);
1536}
1537
1538void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1539 X86_64Assembler* assembler =
1540 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1541 LocationSummary* locations = invoke->GetLocations();
1542
1543 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1544 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1545
1546 /*
1547 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1548 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1549 * compared to generic luni implementation which has 5 rounds of swapping bits.
1550 * x = bswap x
1551 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1552 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1553 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1554 */
1555 __ bswapl(reg);
1556 SwapBits(reg, temp, 1, 0x55555555, assembler);
1557 SwapBits(reg, temp, 2, 0x33333333, assembler);
1558 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1559}
1560
1561void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1562 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1563 LocationSummary::kNoCall,
1564 kIntrinsified);
1565 locations->SetInAt(0, Location::RequiresRegister());
1566 locations->SetOut(Location::SameAsFirstInput());
1567 locations->AddTemp(Location::RequiresRegister());
1568 locations->AddTemp(Location::RequiresRegister());
1569}
1570
1571static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1572 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1573 Immediate imm_shift(shift);
1574 __ movq(temp_mask, Immediate(mask));
1575 __ movq(temp, reg);
1576 __ shrq(reg, imm_shift);
1577 __ andq(temp, temp_mask);
1578 __ andq(reg, temp_mask);
1579 __ shlq(temp, imm_shift);
1580 __ orq(reg, temp);
1581}
1582
1583void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1584 X86_64Assembler* assembler =
1585 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1586 LocationSummary* locations = invoke->GetLocations();
1587
1588 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1589 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1590 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1591
1592 /*
1593 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1594 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1595 * compared to generic luni implementation which has 5 rounds of swapping bits.
1596 * x = bswap x
1597 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1598 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1599 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1600 */
1601 __ bswapq(reg);
1602 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1603 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1604 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1605}
1606
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001607// Unimplemented intrinsics.
1608
1609#define UNIMPLEMENTED_INTRINSIC(Name) \
1610void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1611} \
1612void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1613}
1614
Jeff Hao848f70a2014-01-15 13:49:50 -08001615UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001616UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001617UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1618
Roland Levillain4d027112015-07-01 15:41:14 +01001619#undef UNIMPLEMENTED_INTRINSIC
1620
1621#undef __
1622
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001623} // namespace x86_64
1624} // namespace art