blob: 315cbab0c0eefdf2e1932e56e0a4447d0d9f5c0e [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method-inl.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080024#include "code_generator_x86_64.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
27#include "mirror/array-inl.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080028#include "mirror/string.h"
29#include "thread.h"
30#include "utils/x86_64/assembler_x86_64.h"
31#include "utils/x86_64/constants_x86_64.h"
32
33namespace art {
34
35namespace x86_64 {
36
Mark Mendellfb8d2792015-03-31 22:16:59 -040037IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
38 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
39}
40
41
Andreas Gampe71fb52f2014-12-29 17:43:08 -080042X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
43 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
44}
45
Andreas Gampe878d58c2015-01-15 23:24:00 -080046ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080047 return codegen_->GetGraph()->GetArena();
48}
49
50bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
51 Dispatch(invoke);
52 const LocationSummary* res = invoke->GetLocations();
53 return res != nullptr && res->Intrinsified();
54}
55
56#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
57
58// TODO: trg as memory.
59static void MoveFromReturnRegister(Location trg,
60 Primitive::Type type,
61 CodeGeneratorX86_64* codegen) {
62 if (!trg.IsValid()) {
63 DCHECK(type == Primitive::kPrimVoid);
64 return;
65 }
66
67 switch (type) {
68 case Primitive::kPrimBoolean:
69 case Primitive::kPrimByte:
70 case Primitive::kPrimChar:
71 case Primitive::kPrimShort:
72 case Primitive::kPrimInt:
73 case Primitive::kPrimNot: {
74 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
75 if (trg_reg.AsRegister() != RAX) {
76 __ movl(trg_reg, CpuRegister(RAX));
77 }
78 break;
79 }
80 case Primitive::kPrimLong: {
81 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
82 if (trg_reg.AsRegister() != RAX) {
83 __ movq(trg_reg, CpuRegister(RAX));
84 }
85 break;
86 }
87
88 case Primitive::kPrimVoid:
89 LOG(FATAL) << "Unexpected void type for valid location " << trg;
90 UNREACHABLE();
91
92 case Primitive::kPrimDouble: {
93 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
94 if (trg_reg.AsFloatRegister() != XMM0) {
95 __ movsd(trg_reg, XmmRegister(XMM0));
96 }
97 break;
98 }
99 case Primitive::kPrimFloat: {
100 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
101 if (trg_reg.AsFloatRegister() != XMM0) {
102 __ movss(trg_reg, XmmRegister(XMM0));
103 }
104 break;
105 }
106 }
107}
108
Roland Levillainec525fc2015-04-28 15:50:20 +0100109static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100110 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100111 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800112}
113
114// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
115// call. This will copy the arguments into the positions for a regular call.
116//
117// Note: The actual parameters are required to be in the locations given by the invoke's location
118// summary. If an intrinsic modifies those locations before a slowpath call, they must be
119// restored!
120class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
121 public:
122 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
123
124 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
125 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
126 __ Bind(GetEntryLabel());
127
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000128 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800129
Roland Levillainec525fc2015-04-28 15:50:20 +0100130 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800131
132 if (invoke_->IsInvokeStaticOrDirect()) {
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100133 codegen->GenerateStaticOrDirectCall(
134 invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800135 } else {
Andreas Gampebfb5ba92015-09-01 15:45:02 +0000136 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(RDI));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800137 }
Andreas Gampebfb5ba92015-09-01 15:45:02 +0000138 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800139
140 // Copy the result back to the expected output.
141 Location out = invoke_->GetLocations()->Out();
142 if (out.IsValid()) {
143 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
144 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
145 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
146 }
147
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000148 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800149 __ jmp(GetExitLabel());
150 }
151
Alexandre Rames9931f312015-06-19 14:47:01 +0100152 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
153
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800154 private:
155 // The instruction where this slow path is happening.
156 HInvoke* const invoke_;
157
158 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
159};
160
161#undef __
162#define __ assembler->
163
164static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
165 LocationSummary* locations = new (arena) LocationSummary(invoke,
166 LocationSummary::kNoCall,
167 kIntrinsified);
168 locations->SetInAt(0, Location::RequiresFpuRegister());
169 locations->SetOut(Location::RequiresRegister());
170}
171
172static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
173 LocationSummary* locations = new (arena) LocationSummary(invoke,
174 LocationSummary::kNoCall,
175 kIntrinsified);
176 locations->SetInAt(0, Location::RequiresRegister());
177 locations->SetOut(Location::RequiresFpuRegister());
178}
179
180static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
181 Location input = locations->InAt(0);
182 Location output = locations->Out();
183 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
184}
185
186static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
190}
191
192void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
193 CreateFPToIntLocations(arena_, invoke);
194}
195void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
196 CreateIntToFPLocations(arena_, invoke);
197}
198
199void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
200 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
201}
202void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
203 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
204}
205
206void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
207 CreateFPToIntLocations(arena_, invoke);
208}
209void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
210 CreateIntToFPLocations(arena_, invoke);
211}
212
213void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
214 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
215}
216void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
217 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
218}
219
220static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
221 LocationSummary* locations = new (arena) LocationSummary(invoke,
222 LocationSummary::kNoCall,
223 kIntrinsified);
224 locations->SetInAt(0, Location::RequiresRegister());
225 locations->SetOut(Location::SameAsFirstInput());
226}
227
228static void GenReverseBytes(LocationSummary* locations,
229 Primitive::Type size,
230 X86_64Assembler* assembler) {
231 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
232
233 switch (size) {
234 case Primitive::kPrimShort:
235 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
236 __ bswapl(out);
237 __ sarl(out, Immediate(16));
238 break;
239 case Primitive::kPrimInt:
240 __ bswapl(out);
241 break;
242 case Primitive::kPrimLong:
243 __ bswapq(out);
244 break;
245 default:
246 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
247 UNREACHABLE();
248 }
249}
250
251void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
252 CreateIntToIntLocations(arena_, invoke);
253}
254
255void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
256 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
257}
258
259void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
260 CreateIntToIntLocations(arena_, invoke);
261}
262
263void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
264 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
265}
266
267void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
268 CreateIntToIntLocations(arena_, invoke);
269}
270
271void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
272 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
273}
274
275
276// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
277// need is 64b.
278
279static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
280 // TODO: Enable memory operations when the assembler supports them.
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresFpuRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800285 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400286 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800287}
288
Mark Mendell39dcf552015-04-09 20:42:42 -0400289static void MathAbsFP(LocationSummary* locations,
290 bool is64bit,
291 X86_64Assembler* assembler,
292 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800293 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800294
Mark Mendellcfa410b2015-05-25 16:02:44 -0400295 DCHECK(output.IsFpuRegister());
296 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800297
Mark Mendellcfa410b2015-05-25 16:02:44 -0400298 // TODO: Can mask directly with constant area using pand if we can guarantee
299 // that the literal is aligned on a 16 byte boundary. This will avoid a
300 // temporary.
301 if (is64bit) {
302 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
303 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800304 } else {
Mark Mendellcfa410b2015-05-25 16:02:44 -0400305 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
306 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800307 }
308}
309
310void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
311 CreateFloatToFloatPlusTemps(arena_, invoke);
312}
313
314void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400315 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800316}
317
318void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
319 CreateFloatToFloatPlusTemps(arena_, invoke);
320}
321
322void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400323 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800324}
325
326static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
327 LocationSummary* locations = new (arena) LocationSummary(invoke,
328 LocationSummary::kNoCall,
329 kIntrinsified);
330 locations->SetInAt(0, Location::RequiresRegister());
331 locations->SetOut(Location::SameAsFirstInput());
332 locations->AddTemp(Location::RequiresRegister());
333}
334
335static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
336 Location output = locations->Out();
337 CpuRegister out = output.AsRegister<CpuRegister>();
338 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
339
340 if (is64bit) {
341 // Create mask.
342 __ movq(mask, out);
343 __ sarq(mask, Immediate(63));
344 // Add mask.
345 __ addq(out, mask);
346 __ xorq(out, mask);
347 } else {
348 // Create mask.
349 __ movl(mask, out);
350 __ sarl(mask, Immediate(31));
351 // Add mask.
352 __ addl(out, mask);
353 __ xorl(out, mask);
354 }
355}
356
357void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
358 CreateIntToIntPlusTemp(arena_, invoke);
359}
360
361void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
362 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
363}
364
365void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
366 CreateIntToIntPlusTemp(arena_, invoke);
367}
368
369void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
370 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
371}
372
Mark Mendell39dcf552015-04-09 20:42:42 -0400373static void GenMinMaxFP(LocationSummary* locations,
374 bool is_min,
375 bool is_double,
376 X86_64Assembler* assembler,
377 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800378 Location op1_loc = locations->InAt(0);
379 Location op2_loc = locations->InAt(1);
380 Location out_loc = locations->Out();
381 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
382
383 // Shortcut for same input locations.
384 if (op1_loc.Equals(op2_loc)) {
385 DCHECK(out_loc.Equals(op1_loc));
386 return;
387 }
388
389 // (out := op1)
390 // out <=? op2
391 // if Nan jmp Nan_label
392 // if out is min jmp done
393 // if op2 is min jmp op2_label
394 // handle -0/+0
395 // jmp done
396 // Nan_label:
397 // out := NaN
398 // op2_label:
399 // out := op2
400 // done:
401 //
402 // This removes one jmp, but needs to copy one input (op1) to out.
403 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400404 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800405
406 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
407
Mark Mendell0c9497d2015-08-21 09:30:05 -0400408 NearLabel nan, done, op2_label;
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800409 if (is_double) {
410 __ ucomisd(out, op2);
411 } else {
412 __ ucomiss(out, op2);
413 }
414
415 __ j(Condition::kParityEven, &nan);
416
417 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
418 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
419
420 // Handle 0.0/-0.0.
421 if (is_min) {
422 if (is_double) {
423 __ orpd(out, op2);
424 } else {
425 __ orps(out, op2);
426 }
427 } else {
428 if (is_double) {
429 __ andpd(out, op2);
430 } else {
431 __ andps(out, op2);
432 }
433 }
434 __ jmp(&done);
435
436 // NaN handling.
437 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800438 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400439 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800440 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400441 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800442 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800443 __ jmp(&done);
444
445 // out := op2;
446 __ Bind(&op2_label);
447 if (is_double) {
448 __ movsd(out, op2);
449 } else {
450 __ movss(out, op2);
451 }
452
453 // Done.
454 __ Bind(&done);
455}
456
Mark Mendellf55c3e02015-03-26 21:07:46 -0400457static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 LocationSummary* locations = new (arena) LocationSummary(invoke,
459 LocationSummary::kNoCall,
460 kIntrinsified);
461 locations->SetInAt(0, Location::RequiresFpuRegister());
462 locations->SetInAt(1, Location::RequiresFpuRegister());
463 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
464 // the second input to be the output (we can simply swap inputs).
465 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800466}
467
468void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400469 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800470}
471
472void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400473 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800474}
475
476void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400477 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800478}
479
480void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400481 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800482}
483
484void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400485 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800486}
487
488void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400489 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800490}
491
492void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400493 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800494}
495
496void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400497 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800498}
499
500static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
501 X86_64Assembler* assembler) {
502 Location op1_loc = locations->InAt(0);
503 Location op2_loc = locations->InAt(1);
504
505 // Shortcut for same input locations.
506 if (op1_loc.Equals(op2_loc)) {
507 // Can return immediately, as op1_loc == out_loc.
508 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
509 // a copy here.
510 DCHECK(locations->Out().Equals(op1_loc));
511 return;
512 }
513
514 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
515 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
516
517 // (out := op1)
518 // out <=? op2
519 // if out is min jmp done
520 // out := op2
521 // done:
522
523 if (is_long) {
524 __ cmpq(out, op2);
525 } else {
526 __ cmpl(out, op2);
527 }
528
529 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
530}
531
532static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
533 LocationSummary* locations = new (arena) LocationSummary(invoke,
534 LocationSummary::kNoCall,
535 kIntrinsified);
536 locations->SetInAt(0, Location::RequiresRegister());
537 locations->SetInAt(1, Location::RequiresRegister());
538 locations->SetOut(Location::SameAsFirstInput());
539}
540
541void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
542 CreateIntIntToIntLocations(arena_, invoke);
543}
544
545void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
546 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
547}
548
549void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
550 CreateIntIntToIntLocations(arena_, invoke);
551}
552
553void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
554 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
555}
556
557void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
558 CreateIntIntToIntLocations(arena_, invoke);
559}
560
561void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
562 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
563}
564
565void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
566 CreateIntIntToIntLocations(arena_, invoke);
567}
568
569void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
570 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
571}
572
573static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
574 LocationSummary* locations = new (arena) LocationSummary(invoke,
575 LocationSummary::kNoCall,
576 kIntrinsified);
577 locations->SetInAt(0, Location::RequiresFpuRegister());
578 locations->SetOut(Location::RequiresFpuRegister());
579}
580
581void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
582 CreateFPToFPLocations(arena_, invoke);
583}
584
585void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
586 LocationSummary* locations = invoke->GetLocations();
587 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
588 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
589
590 GetAssembler()->sqrtsd(out, in);
591}
592
Mark Mendellfb8d2792015-03-31 22:16:59 -0400593static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100594 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400595
596 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100597 codegen->GenerateStaticOrDirectCall(
598 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400599 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
600
601 // Copy the result back to the expected output.
602 Location out = invoke->GetLocations()->Out();
603 if (out.IsValid()) {
604 DCHECK(out.IsRegister());
605 MoveFromReturnRegister(out, invoke->GetType(), codegen);
606 }
607}
608
609static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
610 HInvoke* invoke,
611 CodeGeneratorX86_64* codegen) {
612 // Do we have instruction support?
613 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
614 CreateFPToFPLocations(arena, invoke);
615 return;
616 }
617
618 // We have to fall back to a call to the intrinsic.
619 LocationSummary* locations = new (arena) LocationSummary(invoke,
620 LocationSummary::kCall);
621 InvokeRuntimeCallingConvention calling_convention;
622 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
623 locations->SetOut(Location::FpuRegisterLocation(XMM0));
624 // Needs to be RDI for the invoke.
625 locations->AddTemp(Location::RegisterLocation(RDI));
626}
627
628static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
629 HInvoke* invoke,
630 X86_64Assembler* assembler,
631 int round_mode) {
632 LocationSummary* locations = invoke->GetLocations();
633 if (locations->WillCall()) {
634 InvokeOutOfLineIntrinsic(codegen, invoke);
635 } else {
636 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
637 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
638 __ roundsd(out, in, Immediate(round_mode));
639 }
640}
641
642void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
643 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
644}
645
646void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
647 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
648}
649
650void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
651 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
652}
653
654void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
655 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
656}
657
658void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
659 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
660}
661
662void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
663 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
664}
665
666static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
667 HInvoke* invoke,
668 CodeGeneratorX86_64* codegen) {
669 // Do we have instruction support?
670 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
671 LocationSummary* locations = new (arena) LocationSummary(invoke,
672 LocationSummary::kNoCall,
673 kIntrinsified);
674 locations->SetInAt(0, Location::RequiresFpuRegister());
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600675 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400676 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400677 return;
678 }
679
680 // We have to fall back to a call to the intrinsic.
681 LocationSummary* locations = new (arena) LocationSummary(invoke,
682 LocationSummary::kCall);
683 InvokeRuntimeCallingConvention calling_convention;
684 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
685 locations->SetOut(Location::RegisterLocation(RAX));
686 // Needs to be RDI for the invoke.
687 locations->AddTemp(Location::RegisterLocation(RDI));
688}
689
690void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
691 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
692}
693
694void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
695 LocationSummary* locations = invoke->GetLocations();
696 if (locations->WillCall()) {
697 InvokeOutOfLineIntrinsic(codegen_, invoke);
698 return;
699 }
700
701 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
702 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
703 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400704 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendell0c9497d2015-08-21 09:30:05 -0400705 NearLabel done, nan;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400706 X86_64Assembler* assembler = GetAssembler();
707
Mark Mendell40741f32015-04-20 22:10:34 -0400708 // Load 0.5 into inPlusPointFive.
709 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400710
711 // Add in the input.
712 __ addss(inPlusPointFive, in);
713
714 // And truncate to an integer.
715 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
716
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600717 // Load maxInt into out.
718 codegen_->Load64BitValue(out, kPrimIntMax);
719
Mark Mendellfb8d2792015-03-31 22:16:59 -0400720 // if inPlusPointFive >= maxInt goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400721 __ movl(out, Immediate(kPrimIntMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400722 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400723 __ j(kAboveEqual, &done);
724
725 // if input == NaN goto nan
726 __ j(kUnordered, &nan);
727
728 // output = float-to-int-truncate(input)
729 __ cvttss2si(out, inPlusPointFive);
730 __ jmp(&done);
731 __ Bind(&nan);
732
733 // output = 0
734 __ xorl(out, out);
735 __ Bind(&done);
736}
737
738void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
739 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
740}
741
742void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
743 LocationSummary* locations = invoke->GetLocations();
744 if (locations->WillCall()) {
745 InvokeOutOfLineIntrinsic(codegen_, invoke);
746 return;
747 }
748
749 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
750 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
751 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400752 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendell0c9497d2015-08-21 09:30:05 -0400753 NearLabel done, nan;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400754 X86_64Assembler* assembler = GetAssembler();
755
Mark Mendell40741f32015-04-20 22:10:34 -0400756 // Load 0.5 into inPlusPointFive.
757 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400758
759 // Add in the input.
760 __ addsd(inPlusPointFive, in);
761
762 // And truncate to an integer.
763 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
764
Pavel Vyssotski9ca25712015-07-31 13:03:17 +0600765 // Load maxLong into out.
766 codegen_->Load64BitValue(out, kPrimLongMax);
767
Mark Mendellfb8d2792015-03-31 22:16:59 -0400768 // if inPlusPointFive >= maxLong goto done
Mark Mendellcfa410b2015-05-25 16:02:44 -0400769 __ movq(out, Immediate(kPrimLongMax));
Mark Mendell40741f32015-04-20 22:10:34 -0400770 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400771 __ j(kAboveEqual, &done);
772
773 // if input == NaN goto nan
774 __ j(kUnordered, &nan);
775
776 // output = double-to-long-truncate(input)
777 __ cvttsd2si(out, inPlusPointFive, true);
778 __ jmp(&done);
779 __ Bind(&nan);
780
781 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400782 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400783 __ Bind(&done);
784}
785
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800786void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
787 // The inputs plus one temp.
788 LocationSummary* locations = new (arena_) LocationSummary(invoke,
789 LocationSummary::kCallOnSlowPath,
790 kIntrinsified);
791 locations->SetInAt(0, Location::RequiresRegister());
792 locations->SetInAt(1, Location::RequiresRegister());
793 locations->SetOut(Location::SameAsFirstInput());
794 locations->AddTemp(Location::RequiresRegister());
795}
796
797void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
798 LocationSummary* locations = invoke->GetLocations();
799
Mark Mendell6bc53a92015-07-01 14:26:52 -0400800 // Location of reference to data array.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800801 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
Mark Mendell6bc53a92015-07-01 14:26:52 -0400802 // Location of count.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800803 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800804
805 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
806 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
807 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800808
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800809 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
810 // the cost.
811 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
812 // we will not optimize the code for constants (which would save a register).
813
Andreas Gampe878d58c2015-01-15 23:24:00 -0800814 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800815 codegen_->AddSlowPath(slow_path);
816
817 X86_64Assembler* assembler = GetAssembler();
818
819 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800820 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800821 __ j(kAboveEqual, slow_path->GetEntryLabel());
822
Jeff Hao848f70a2014-01-15 13:49:50 -0800823 // out = out[2*idx].
824 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800825
826 __ Bind(slow_path->GetExitLabel());
827}
828
Mark Mendell6bc53a92015-07-01 14:26:52 -0400829void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
830 // Check to see if we have known failures that will cause us to have to bail out
831 // to the runtime, and just generate the runtime call directly.
832 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
833 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
834
835 // The positions must be non-negative.
836 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
837 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
838 // We will have to fail anyways.
839 return;
840 }
841
842 // The length must be > 0.
843 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
844 if (length != nullptr) {
845 int32_t len = length->GetValue();
846 if (len < 0) {
847 // Just call as normal.
848 return;
849 }
850 }
851
852 LocationSummary* locations = new (arena_) LocationSummary(invoke,
853 LocationSummary::kCallOnSlowPath,
854 kIntrinsified);
855 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
856 locations->SetInAt(0, Location::RequiresRegister());
857 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
858 locations->SetInAt(2, Location::RequiresRegister());
859 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
860 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
861
862 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
863 locations->AddTemp(Location::RegisterLocation(RSI));
864 locations->AddTemp(Location::RegisterLocation(RDI));
865 locations->AddTemp(Location::RegisterLocation(RCX));
866}
867
868static void CheckPosition(X86_64Assembler* assembler,
869 Location pos,
870 CpuRegister input,
871 CpuRegister length,
872 SlowPathCodeX86_64* slow_path,
873 CpuRegister input_len,
874 CpuRegister temp) {
875 // Where is the length in the String?
876 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
877
878 if (pos.IsConstant()) {
879 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
880 if (pos_const == 0) {
881 // Check that length(input) >= length.
882 __ cmpl(Address(input, length_offset), length);
883 __ j(kLess, slow_path->GetEntryLabel());
884 } else {
885 // Check that length(input) >= pos.
886 __ movl(input_len, Address(input, length_offset));
887 __ cmpl(input_len, Immediate(pos_const));
888 __ j(kLess, slow_path->GetEntryLabel());
889
890 // Check that (length(input) - pos) >= length.
891 __ leal(temp, Address(input_len, -pos_const));
892 __ cmpl(temp, length);
893 __ j(kLess, slow_path->GetEntryLabel());
894 }
895 } else {
896 // Check that pos >= 0.
897 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
898 __ testl(pos_reg, pos_reg);
899 __ j(kLess, slow_path->GetEntryLabel());
900
901 // Check that pos <= length(input).
902 __ cmpl(Address(input, length_offset), pos_reg);
903 __ j(kLess, slow_path->GetEntryLabel());
904
905 // Check that (length(input) - pos) >= length.
906 __ movl(temp, Address(input, length_offset));
907 __ subl(temp, pos_reg);
908 __ cmpl(temp, length);
909 __ j(kLess, slow_path->GetEntryLabel());
910 }
911}
912
913void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
914 X86_64Assembler* assembler = GetAssembler();
915 LocationSummary* locations = invoke->GetLocations();
916
917 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
918 Location srcPos = locations->InAt(1);
919 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
920 Location destPos = locations->InAt(3);
921 Location length = locations->InAt(4);
922
923 // Temporaries that we need for MOVSW.
924 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
925 DCHECK_EQ(src_base.AsRegister(), RSI);
926 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
927 DCHECK_EQ(dest_base.AsRegister(), RDI);
928 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
929 DCHECK_EQ(count.AsRegister(), RCX);
930
931 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
932 codegen_->AddSlowPath(slow_path);
933
934 // Bail out if the source and destination are the same.
935 __ cmpl(src, dest);
936 __ j(kEqual, slow_path->GetEntryLabel());
937
938 // Bail out if the source is null.
939 __ testl(src, src);
940 __ j(kEqual, slow_path->GetEntryLabel());
941
942 // Bail out if the destination is null.
943 __ testl(dest, dest);
944 __ j(kEqual, slow_path->GetEntryLabel());
945
946 // If the length is negative, bail out.
947 // We have already checked in the LocationsBuilder for the constant case.
948 if (!length.IsConstant()) {
949 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
950 __ j(kLess, slow_path->GetEntryLabel());
951 }
952
953 // We need the count in RCX.
954 if (length.IsConstant()) {
955 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
956 } else {
957 __ movl(count, length.AsRegister<CpuRegister>());
958 }
959
960 // Validity checks: source.
961 CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
962
963 // Validity checks: dest.
964 CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
965
966 // Okay, everything checks out. Finally time to do the copy.
967 // Check assumption that sizeof(Char) is 2 (used in scaling below).
968 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
969 DCHECK_EQ(char_size, 2u);
970
971 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
972
973 if (srcPos.IsConstant()) {
974 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
975 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
976 } else {
977 __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(),
978 ScaleFactor::TIMES_2, data_offset));
979 }
980 if (destPos.IsConstant()) {
981 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
982 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
983 } else {
984 __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(),
985 ScaleFactor::TIMES_2, data_offset));
986 }
987
988 // Do the move.
989 __ rep_movsw();
990
991 __ Bind(slow_path->GetExitLabel());
992}
993
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000994void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
995 LocationSummary* locations = new (arena_) LocationSummary(invoke,
996 LocationSummary::kCall,
997 kIntrinsified);
998 InvokeRuntimeCallingConvention calling_convention;
999 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1000 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1001 locations->SetOut(Location::RegisterLocation(RAX));
1002}
1003
1004void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1005 X86_64Assembler* assembler = GetAssembler();
1006 LocationSummary* locations = invoke->GetLocations();
1007
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001008 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001009 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001010
1011 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1012 __ testl(argument, argument);
1013 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1014 codegen_->AddSlowPath(slow_path);
1015 __ j(kEqual, slow_path->GetEntryLabel());
1016
1017 __ gs()->call(Address::Absolute(
1018 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
1019 __ Bind(slow_path->GetExitLabel());
1020}
1021
Agi Csakif8cfb202015-08-13 17:54:54 -07001022void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1023 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1024 LocationSummary::kNoCall,
1025 kIntrinsified);
1026 locations->SetInAt(0, Location::RequiresRegister());
1027 locations->SetInAt(1, Location::RequiresRegister());
1028
1029 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1030 locations->AddTemp(Location::RegisterLocation(RCX));
1031 locations->AddTemp(Location::RegisterLocation(RDI));
1032
1033 // Set output, RSI needed for repe_cmpsq instruction anyways.
1034 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1035}
1036
1037void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1038 X86_64Assembler* assembler = GetAssembler();
1039 LocationSummary* locations = invoke->GetLocations();
1040
1041 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1042 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1043 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1044 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1045 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1046
Mark Mendell0c9497d2015-08-21 09:30:05 -04001047 NearLabel end, return_true, return_false;
Agi Csakif8cfb202015-08-13 17:54:54 -07001048
1049 // Get offsets of count, value, and class fields within a string object.
1050 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1051 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1052 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1053
1054 // Note that the null check must have been done earlier.
1055 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1056
1057 // Check if input is null, return false if it is.
1058 __ testl(arg, arg);
1059 __ j(kEqual, &return_false);
1060
1061 // Instanceof check for the argument by comparing class fields.
1062 // All string objects must have the same type since String cannot be subclassed.
1063 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1064 // If the argument is a string object, its class field must be equal to receiver's class field.
1065 __ movl(rcx, Address(str, class_offset));
1066 __ cmpl(rcx, Address(arg, class_offset));
1067 __ j(kNotEqual, &return_false);
1068
1069 // Reference equality check, return true if same reference.
1070 __ cmpl(str, arg);
1071 __ j(kEqual, &return_true);
1072
1073 // Load length of receiver string.
1074 __ movl(rcx, Address(str, count_offset));
1075 // Check if lengths are equal, return false if they're not.
1076 __ cmpl(rcx, Address(arg, count_offset));
1077 __ j(kNotEqual, &return_false);
1078 // Return true if both strings are empty.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001079 __ jrcxz(&return_true);
Agi Csakif8cfb202015-08-13 17:54:54 -07001080
1081 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1082 __ leal(rsi, Address(str, value_offset));
1083 __ leal(rdi, Address(arg, value_offset));
1084
1085 // Divide string length by 4 and adjust for lengths not divisible by 4.
1086 __ addl(rcx, Immediate(3));
1087 __ shrl(rcx, Immediate(2));
1088
1089 // Assertions that must hold in order to compare strings 4 characters at a time.
1090 DCHECK_ALIGNED(value_offset, 8);
1091 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1092
1093 // Loop to compare strings four characters at a time starting at the beginning of the string.
1094 __ repe_cmpsq();
1095 // If strings are not equal, zero flag will be cleared.
1096 __ j(kNotEqual, &return_false);
1097
1098 // Return true and exit the function.
1099 // If loop does not result in returning false, we return true.
1100 __ Bind(&return_true);
1101 __ movl(rsi, Immediate(1));
1102 __ jmp(&end);
1103
1104 // Return false and exit the function.
1105 __ Bind(&return_false);
1106 __ xorl(rsi, rsi);
1107 __ Bind(&end);
1108}
1109
Andreas Gampe21030dd2015-05-07 14:46:15 -07001110static void CreateStringIndexOfLocations(HInvoke* invoke,
1111 ArenaAllocator* allocator,
1112 bool start_at_zero) {
1113 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1114 LocationSummary::kCallOnSlowPath,
1115 kIntrinsified);
1116 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1117 locations->SetInAt(0, Location::RegisterLocation(RDI));
1118 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1119 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1120 // of the instruction explicitly.
1121 // Note: This works as we don't clobber RAX anywhere.
1122 locations->SetInAt(1, Location::RegisterLocation(RAX));
1123 if (!start_at_zero) {
1124 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1125 }
1126 // As we clobber RDI during execution anyways, also use it as the output.
1127 locations->SetOut(Location::SameAsFirstInput());
1128
1129 // repne scasw uses RCX as the counter.
1130 locations->AddTemp(Location::RegisterLocation(RCX));
1131 // Need another temporary to be able to compute the result.
1132 locations->AddTemp(Location::RequiresRegister());
1133}
1134
1135static void GenerateStringIndexOf(HInvoke* invoke,
1136 X86_64Assembler* assembler,
1137 CodeGeneratorX86_64* codegen,
1138 ArenaAllocator* allocator,
1139 bool start_at_zero) {
1140 LocationSummary* locations = invoke->GetLocations();
1141
1142 // Note that the null check must have been done earlier.
1143 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1144
1145 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1146 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1147 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1148 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1149 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1150
1151 // Check our assumptions for registers.
1152 DCHECK_EQ(string_obj.AsRegister(), RDI);
1153 DCHECK_EQ(search_value.AsRegister(), RAX);
1154 DCHECK_EQ(counter.AsRegister(), RCX);
1155 DCHECK_EQ(out.AsRegister(), RDI);
1156
1157 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1158 // or directly dispatch if we have a constant.
1159 SlowPathCodeX86_64* slow_path = nullptr;
1160 if (invoke->InputAt(1)->IsIntConstant()) {
1161 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1162 std::numeric_limits<uint16_t>::max()) {
1163 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1164 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1165 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1166 codegen->AddSlowPath(slow_path);
1167 __ jmp(slow_path->GetEntryLabel());
1168 __ Bind(slow_path->GetExitLabel());
1169 return;
1170 }
1171 } else {
1172 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1173 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1174 codegen->AddSlowPath(slow_path);
1175 __ j(kAbove, slow_path->GetEntryLabel());
1176 }
1177
1178 // From here down, we know that we are looking for a char that fits in 16 bits.
1179 // Location of reference to data array within the String object.
1180 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1181 // Location of count within the String object.
1182 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1183
1184 // Load string length, i.e., the count field of the string.
1185 __ movl(string_length, Address(string_obj, count_offset));
1186
1187 // Do a length check.
1188 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001189 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001190 __ testl(string_length, string_length);
1191 __ j(kEqual, &not_found_label);
1192
1193 if (start_at_zero) {
1194 // Number of chars to scan is the same as the string length.
1195 __ movl(counter, string_length);
1196
1197 // Move to the start of the string.
1198 __ addq(string_obj, Immediate(value_offset));
1199 } else {
1200 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1201
1202 // Do a start_index check.
1203 __ cmpl(start_index, string_length);
1204 __ j(kGreaterEqual, &not_found_label);
1205
1206 // Ensure we have a start index >= 0;
1207 __ xorl(counter, counter);
1208 __ cmpl(start_index, Immediate(0));
1209 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
1210
1211 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1212 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1213
1214 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1215 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1216 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1217 }
1218
1219 // Everything is set up for repne scasw:
1220 // * Comparison address in RDI.
1221 // * Counter in ECX.
1222 __ repne_scasw();
1223
1224 // Did we find a match?
1225 __ j(kNotEqual, &not_found_label);
1226
1227 // Yes, we matched. Compute the index of the result.
1228 __ subl(string_length, counter);
1229 __ leal(out, Address(string_length, -1));
1230
Mark Mendell0c9497d2015-08-21 09:30:05 -04001231 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001232 __ jmp(&done);
1233
1234 // Failed to match; return -1.
1235 __ Bind(&not_found_label);
1236 __ movl(out, Immediate(-1));
1237
1238 // And join up at the end.
1239 __ Bind(&done);
1240 if (slow_path != nullptr) {
1241 __ Bind(slow_path->GetExitLabel());
1242 }
1243}
1244
1245void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1246 CreateStringIndexOfLocations(invoke, arena_, true);
1247}
1248
1249void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1250 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1251}
1252
1253void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1254 CreateStringIndexOfLocations(invoke, arena_, false);
1255}
1256
1257void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1258 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1259}
1260
Jeff Hao848f70a2014-01-15 13:49:50 -08001261void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1262 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1263 LocationSummary::kCall,
1264 kIntrinsified);
1265 InvokeRuntimeCallingConvention calling_convention;
1266 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1267 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1268 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1269 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1270 locations->SetOut(Location::RegisterLocation(RAX));
1271}
1272
1273void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1274 X86_64Assembler* assembler = GetAssembler();
1275 LocationSummary* locations = invoke->GetLocations();
1276
1277 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1278 __ testl(byte_array, byte_array);
1279 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1280 codegen_->AddSlowPath(slow_path);
1281 __ j(kEqual, slow_path->GetEntryLabel());
1282
1283 __ gs()->call(Address::Absolute(
1284 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1285 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1286 __ Bind(slow_path->GetExitLabel());
1287}
1288
1289void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1290 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1291 LocationSummary::kCall,
1292 kIntrinsified);
1293 InvokeRuntimeCallingConvention calling_convention;
1294 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1295 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1296 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1297 locations->SetOut(Location::RegisterLocation(RAX));
1298}
1299
1300void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1301 X86_64Assembler* assembler = GetAssembler();
1302
1303 __ gs()->call(Address::Absolute(
1304 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1305 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1306}
1307
1308void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1309 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1310 LocationSummary::kCall,
1311 kIntrinsified);
1312 InvokeRuntimeCallingConvention calling_convention;
1313 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1314 locations->SetOut(Location::RegisterLocation(RAX));
1315}
1316
1317void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1318 X86_64Assembler* assembler = GetAssembler();
1319 LocationSummary* locations = invoke->GetLocations();
1320
1321 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1322 __ testl(string_to_copy, string_to_copy);
1323 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1324 codegen_->AddSlowPath(slow_path);
1325 __ j(kEqual, slow_path->GetEntryLabel());
1326
1327 __ gs()->call(Address::Absolute(
1328 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1329 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1330 __ Bind(slow_path->GetExitLabel());
1331}
1332
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001333static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1334 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1335 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1336 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1337 // to avoid a SIGBUS.
1338 switch (size) {
1339 case Primitive::kPrimByte:
1340 __ movsxb(out, Address(address, 0));
1341 break;
1342 case Primitive::kPrimShort:
1343 __ movsxw(out, Address(address, 0));
1344 break;
1345 case Primitive::kPrimInt:
1346 __ movl(out, Address(address, 0));
1347 break;
1348 case Primitive::kPrimLong:
1349 __ movq(out, Address(address, 0));
1350 break;
1351 default:
1352 LOG(FATAL) << "Type not recognized for peek: " << size;
1353 UNREACHABLE();
1354 }
1355}
1356
1357void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1358 CreateIntToIntLocations(arena_, invoke);
1359}
1360
1361void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1362 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1363}
1364
1365void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1366 CreateIntToIntLocations(arena_, invoke);
1367}
1368
1369void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1370 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1371}
1372
1373void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1374 CreateIntToIntLocations(arena_, invoke);
1375}
1376
1377void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1378 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1379}
1380
1381void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1382 CreateIntToIntLocations(arena_, invoke);
1383}
1384
1385void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1386 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1387}
1388
1389static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1390 LocationSummary* locations = new (arena) LocationSummary(invoke,
1391 LocationSummary::kNoCall,
1392 kIntrinsified);
1393 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001394 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001395}
1396
1397static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1398 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001399 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001400 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1401 // to avoid a SIGBUS.
1402 switch (size) {
1403 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001404 if (value.IsConstant()) {
1405 __ movb(Address(address, 0),
1406 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1407 } else {
1408 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1409 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001410 break;
1411 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001412 if (value.IsConstant()) {
1413 __ movw(Address(address, 0),
1414 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1415 } else {
1416 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1417 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001418 break;
1419 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001420 if (value.IsConstant()) {
1421 __ movl(Address(address, 0),
1422 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1423 } else {
1424 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1425 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001426 break;
1427 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001428 if (value.IsConstant()) {
1429 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1430 DCHECK(IsInt<32>(v));
1431 int32_t v_32 = v;
1432 __ movq(Address(address, 0), Immediate(v_32));
1433 } else {
1434 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1435 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001436 break;
1437 default:
1438 LOG(FATAL) << "Type not recognized for poke: " << size;
1439 UNREACHABLE();
1440 }
1441}
1442
1443void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1444 CreateIntIntToVoidLocations(arena_, invoke);
1445}
1446
1447void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1448 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1449}
1450
1451void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1452 CreateIntIntToVoidLocations(arena_, invoke);
1453}
1454
1455void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1456 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1457}
1458
1459void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1460 CreateIntIntToVoidLocations(arena_, invoke);
1461}
1462
1463void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1464 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1465}
1466
1467void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1468 CreateIntIntToVoidLocations(arena_, invoke);
1469}
1470
1471void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1472 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1473}
1474
1475void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1476 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1477 LocationSummary::kNoCall,
1478 kIntrinsified);
1479 locations->SetOut(Location::RequiresRegister());
1480}
1481
1482void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1483 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1484 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1485}
1486
Andreas Gampe878d58c2015-01-15 23:24:00 -08001487static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001488 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1489 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1490 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1491 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1492
Andreas Gampe878d58c2015-01-15 23:24:00 -08001493 switch (type) {
1494 case Primitive::kPrimInt:
1495 case Primitive::kPrimNot:
1496 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain4d027112015-07-01 15:41:14 +01001497 if (type == Primitive::kPrimNot) {
1498 __ MaybeUnpoisonHeapReference(trg);
1499 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001500 break;
1501
1502 case Primitive::kPrimLong:
1503 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1504 break;
1505
1506 default:
1507 LOG(FATAL) << "Unsupported op size " << type;
1508 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001509 }
1510}
1511
1512static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1513 LocationSummary* locations = new (arena) LocationSummary(invoke,
1514 LocationSummary::kNoCall,
1515 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001516 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001517 locations->SetInAt(1, Location::RequiresRegister());
1518 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001519 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001520}
1521
1522void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1523 CreateIntIntIntToIntLocations(arena_, invoke);
1524}
1525void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1526 CreateIntIntIntToIntLocations(arena_, invoke);
1527}
1528void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1529 CreateIntIntIntToIntLocations(arena_, invoke);
1530}
1531void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1532 CreateIntIntIntToIntLocations(arena_, invoke);
1533}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001534void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1535 CreateIntIntIntToIntLocations(arena_, invoke);
1536}
1537void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1538 CreateIntIntIntToIntLocations(arena_, invoke);
1539}
1540
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001541
1542void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001543 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001544}
1545void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001546 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001547}
1548void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001549 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001550}
1551void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001552 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001553}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001554void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1555 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1556}
1557void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1558 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1559}
1560
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001561
1562static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1563 Primitive::Type type,
1564 HInvoke* invoke) {
1565 LocationSummary* locations = new (arena) LocationSummary(invoke,
1566 LocationSummary::kNoCall,
1567 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001568 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001569 locations->SetInAt(1, Location::RequiresRegister());
1570 locations->SetInAt(2, Location::RequiresRegister());
1571 locations->SetInAt(3, Location::RequiresRegister());
1572 if (type == Primitive::kPrimNot) {
1573 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01001574 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001575 locations->AddTemp(Location::RequiresRegister());
1576 }
1577}
1578
1579void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1580 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1581}
1582void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1583 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1584}
1585void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1586 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1587}
1588void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1589 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1590}
1591void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1592 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1593}
1594void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1595 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1596}
1597void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1598 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1599}
1600void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1601 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1602}
1603void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1604 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1605}
1606
1607// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1608// memory model.
1609static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1610 CodeGeneratorX86_64* codegen) {
1611 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1612 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1613 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1614 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1615
1616 if (type == Primitive::kPrimLong) {
1617 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
Roland Levillain4d027112015-07-01 15:41:14 +01001618 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1619 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1620 __ movl(temp, value);
1621 __ PoisonHeapReference(temp);
1622 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001623 } else {
1624 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1625 }
1626
1627 if (is_volatile) {
1628 __ mfence();
1629 }
1630
1631 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001632 bool value_can_be_null = true; // TODO: Worth finding out this information?
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001633 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1634 locations->GetTemp(1).AsRegister<CpuRegister>(),
1635 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001636 value,
1637 value_can_be_null);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001638 }
1639}
1640
1641void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1642 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1643}
1644void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1645 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1646}
1647void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1648 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1649}
1650void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1651 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1652}
1653void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1654 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1655}
1656void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1657 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1658}
1659void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1660 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1661}
1662void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1663 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1664}
1665void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1666 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1667}
1668
Mark Mendell58d25fd2015-04-03 14:52:31 -04001669static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1670 HInvoke* invoke) {
1671 LocationSummary* locations = new (arena) LocationSummary(invoke,
1672 LocationSummary::kNoCall,
1673 kIntrinsified);
1674 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1675 locations->SetInAt(1, Location::RequiresRegister());
1676 locations->SetInAt(2, Location::RequiresRegister());
1677 // expected value must be in EAX/RAX.
1678 locations->SetInAt(3, Location::RegisterLocation(RAX));
1679 locations->SetInAt(4, Location::RequiresRegister());
1680
1681 locations->SetOut(Location::RequiresRegister());
1682 if (type == Primitive::kPrimNot) {
1683 // Need temp registers for card-marking.
1684 locations->AddTemp(Location::RequiresRegister());
1685 locations->AddTemp(Location::RequiresRegister());
1686 }
1687}
1688
1689void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1690 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1691}
1692
1693void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1694 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1695}
1696
1697void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1698 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1699}
1700
1701static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1702 X86_64Assembler* assembler =
1703 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1704 LocationSummary* locations = invoke->GetLocations();
1705
1706 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1707 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1708 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1709 DCHECK_EQ(expected.AsRegister(), RAX);
1710 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1711 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1712
1713 if (type == Primitive::kPrimLong) {
1714 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1715 } else {
1716 // Integer or object.
1717 if (type == Primitive::kPrimNot) {
1718 // Mark card for object assuming new value is stored.
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001719 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell58d25fd2015-04-03 14:52:31 -04001720 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1721 locations->GetTemp(1).AsRegister<CpuRegister>(),
1722 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01001723 value,
1724 value_can_be_null);
Roland Levillain4d027112015-07-01 15:41:14 +01001725
1726 if (kPoisonHeapReferences) {
1727 __ PoisonHeapReference(expected);
1728 __ PoisonHeapReference(value);
1729 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001730 }
1731
1732 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1733 }
1734
1735 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1736 // barriers at this time.
1737
1738 // Convert ZF into the boolean result.
1739 __ setcc(kZero, out);
1740 __ movzxb(out, out);
Roland Levillain4d027112015-07-01 15:41:14 +01001741
1742 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1743 __ UnpoisonHeapReference(value);
1744 __ UnpoisonHeapReference(expected);
1745 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04001746}
1747
1748void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1749 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1750}
1751
1752void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1753 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1754}
1755
1756void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1757 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1758}
1759
1760void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1761 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1762 LocationSummary::kNoCall,
1763 kIntrinsified);
1764 locations->SetInAt(0, Location::RequiresRegister());
1765 locations->SetOut(Location::SameAsFirstInput());
1766 locations->AddTemp(Location::RequiresRegister());
1767}
1768
1769static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1770 X86_64Assembler* assembler) {
1771 Immediate imm_shift(shift);
1772 Immediate imm_mask(mask);
1773 __ movl(temp, reg);
1774 __ shrl(reg, imm_shift);
1775 __ andl(temp, imm_mask);
1776 __ andl(reg, imm_mask);
1777 __ shll(temp, imm_shift);
1778 __ orl(reg, temp);
1779}
1780
1781void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1782 X86_64Assembler* assembler =
1783 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1784 LocationSummary* locations = invoke->GetLocations();
1785
1786 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1787 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1788
1789 /*
1790 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1791 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1792 * compared to generic luni implementation which has 5 rounds of swapping bits.
1793 * x = bswap x
1794 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1795 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1796 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1797 */
1798 __ bswapl(reg);
1799 SwapBits(reg, temp, 1, 0x55555555, assembler);
1800 SwapBits(reg, temp, 2, 0x33333333, assembler);
1801 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1802}
1803
1804void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1805 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1806 LocationSummary::kNoCall,
1807 kIntrinsified);
1808 locations->SetInAt(0, Location::RequiresRegister());
1809 locations->SetOut(Location::SameAsFirstInput());
1810 locations->AddTemp(Location::RequiresRegister());
1811 locations->AddTemp(Location::RequiresRegister());
1812}
1813
1814static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1815 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1816 Immediate imm_shift(shift);
1817 __ movq(temp_mask, Immediate(mask));
1818 __ movq(temp, reg);
1819 __ shrq(reg, imm_shift);
1820 __ andq(temp, temp_mask);
1821 __ andq(reg, temp_mask);
1822 __ shlq(temp, imm_shift);
1823 __ orq(reg, temp);
1824}
1825
1826void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1827 X86_64Assembler* assembler =
1828 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1829 LocationSummary* locations = invoke->GetLocations();
1830
1831 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1832 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1833 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1834
1835 /*
1836 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1837 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1838 * compared to generic luni implementation which has 5 rounds of swapping bits.
1839 * x = bswap x
1840 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1841 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1842 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1843 */
1844 __ bswapq(reg);
1845 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1846 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1847 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1848}
1849
Mark Mendelld5897672015-08-12 21:16:41 -04001850static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
1851 LocationSummary* locations = new (arena) LocationSummary(invoke,
1852 LocationSummary::kNoCall,
1853 kIntrinsified);
1854 locations->SetInAt(0, Location::Any());
1855 locations->SetOut(Location::RequiresRegister());
1856}
1857
1858static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
1859 LocationSummary* locations = invoke->GetLocations();
1860 Location src = locations->InAt(0);
1861 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1862
1863 int zero_value_result = is_long ? 64 : 32;
1864 if (invoke->InputAt(0)->IsConstant()) {
1865 // Evaluate this at compile time.
1866 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
1867 if (value == 0) {
1868 value = zero_value_result;
1869 } else {
1870 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
1871 }
1872 if (value == 0) {
1873 __ xorl(out, out);
1874 } else {
1875 __ movl(out, Immediate(value));
1876 }
1877 return;
1878 }
1879
1880 // Handle the non-constant cases.
1881 if (src.IsRegister()) {
1882 if (is_long) {
1883 __ bsrq(out, src.AsRegister<CpuRegister>());
1884 } else {
1885 __ bsrl(out, src.AsRegister<CpuRegister>());
1886 }
1887 } else if (is_long) {
1888 DCHECK(src.IsDoubleStackSlot());
1889 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1890 } else {
1891 DCHECK(src.IsStackSlot());
1892 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1893 }
1894
1895 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001896 NearLabel is_zero, done;
Mark Mendelld5897672015-08-12 21:16:41 -04001897 __ j(kEqual, &is_zero);
1898
1899 // Correct the result from BSR to get the CLZ result.
1900 __ xorl(out, Immediate(zero_value_result - 1));
1901 __ jmp(&done);
1902
1903 // Fix the zero case with the expected result.
1904 __ Bind(&is_zero);
1905 __ movl(out, Immediate(zero_value_result));
1906
1907 __ Bind(&done);
1908}
1909
1910void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
1911 CreateLeadingZeroLocations(arena_, invoke);
1912}
1913
1914void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
1915 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1916 GenLeadingZeros(assembler, invoke, /* is_long */ false);
1917}
1918
1919void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
1920 CreateLeadingZeroLocations(arena_, invoke);
1921}
1922
1923void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
1924 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1925 GenLeadingZeros(assembler, invoke, /* is_long */ true);
1926}
1927
Mark Mendell2d554792015-09-15 21:45:18 -04001928static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
1929 LocationSummary* locations = new (arena) LocationSummary(invoke,
1930 LocationSummary::kNoCall,
1931 kIntrinsified);
1932 locations->SetInAt(0, Location::Any());
1933 locations->SetOut(Location::RequiresRegister());
1934}
1935
1936static void GenTrailingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
1937 LocationSummary* locations = invoke->GetLocations();
1938 Location src = locations->InAt(0);
1939 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1940
1941 int zero_value_result = is_long ? 64 : 32;
1942 if (invoke->InputAt(0)->IsConstant()) {
1943 // Evaluate this at compile time.
1944 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
1945 if (value == 0) {
1946 value = zero_value_result;
1947 } else {
1948 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
1949 }
1950 if (value == 0) {
1951 __ xorl(out, out);
1952 } else {
1953 __ movl(out, Immediate(value));
1954 }
1955 return;
1956 }
1957
1958 // Handle the non-constant cases.
1959 if (src.IsRegister()) {
1960 if (is_long) {
1961 __ bsfq(out, src.AsRegister<CpuRegister>());
1962 } else {
1963 __ bsfl(out, src.AsRegister<CpuRegister>());
1964 }
1965 } else if (is_long) {
1966 DCHECK(src.IsDoubleStackSlot());
1967 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1968 } else {
1969 DCHECK(src.IsStackSlot());
1970 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
1971 }
1972
1973 // BSF sets ZF if the input was zero, and the output is undefined.
1974 NearLabel done;
1975 __ j(kNotEqual, &done);
1976
1977 // Fix the zero case with the expected result.
1978 __ movl(out, Immediate(zero_value_result));
1979
1980 __ Bind(&done);
1981}
1982
1983void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
1984 CreateTrailingZeroLocations(arena_, invoke);
1985}
1986
1987void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
1988 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1989 GenTrailingZeros(assembler, invoke, /* is_long */ false);
1990}
1991
1992void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
1993 CreateTrailingZeroLocations(arena_, invoke);
1994}
1995
1996void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
1997 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
1998 GenTrailingZeros(assembler, invoke, /* is_long */ true);
1999}
2000
2001static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
2002 LocationSummary* locations = new (arena) LocationSummary(invoke,
2003 LocationSummary::kNoCall,
2004 kIntrinsified);
2005 locations->SetInAt(0, Location::RequiresRegister());
2006 // The shift count needs to be in CL or a constant.
2007 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
2008 locations->SetOut(Location::SameAsFirstInput());
2009}
2010
2011static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
2012 LocationSummary* locations = invoke->GetLocations();
2013 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
2014 Location second = locations->InAt(1);
2015
2016 if (is_long) {
2017 if (second.IsRegister()) {
2018 CpuRegister second_reg = second.AsRegister<CpuRegister>();
2019 if (is_left) {
2020 __ rolq(first_reg, second_reg);
2021 } else {
2022 __ rorq(first_reg, second_reg);
2023 }
2024 } else {
2025 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
2026 if (is_left) {
2027 __ rolq(first_reg, imm);
2028 } else {
2029 __ rorq(first_reg, imm);
2030 }
2031 }
2032 } else {
2033 if (second.IsRegister()) {
2034 CpuRegister second_reg = second.AsRegister<CpuRegister>();
2035 if (is_left) {
2036 __ roll(first_reg, second_reg);
2037 } else {
2038 __ rorl(first_reg, second_reg);
2039 }
2040 } else {
2041 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
2042 if (is_left) {
2043 __ roll(first_reg, imm);
2044 } else {
2045 __ rorl(first_reg, imm);
2046 }
2047 }
2048 }
2049}
2050
2051void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
2052 CreateRotateLocations(arena_, invoke);
2053}
2054
2055void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
2056 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
2057 GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
2058}
2059
2060void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
2061 CreateRotateLocations(arena_, invoke);
2062}
2063
2064void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
2065 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
2066 GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
2067}
2068
2069void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
2070 CreateRotateLocations(arena_, invoke);
2071}
2072
2073void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
2074 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
2075 GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
2076}
2077
2078void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
2079 CreateRotateLocations(arena_, invoke);
2080}
2081
2082void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
2083 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
2084 GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
2085}
2086
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002087// Unimplemented intrinsics.
2088
2089#define UNIMPLEMENTED_INTRINSIC(Name) \
2090void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
2091} \
2092void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
2093}
2094
Jeff Hao848f70a2014-01-15 13:49:50 -08002095UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002096UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
2097
Roland Levillain4d027112015-07-01 15:41:14 +01002098#undef UNIMPLEMENTED_INTRINSIC
2099
2100#undef __
2101
Andreas Gampe71fb52f2014-12-29 17:43:08 -08002102} // namespace x86_64
2103} // namespace art