blob: aec2d19b1d566271a4f450f589a0c80c1841580b [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86/instruction_set_features_x86.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040020#include "code_generator_x86.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86/assembler_x86.h"
28#include "utils/x86/constants_x86.h"
29
30namespace art {
31
32namespace x86 {
33
34static constexpr int kDoubleNaNHigh = 0x7FF80000;
35static constexpr int kDoubleNaNLow = 0x00000000;
36static constexpr int kFloatNaN = 0x7FC00000;
37
Mark Mendellfb8d2792015-03-31 22:16:59 -040038IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40}
41
42
Mark Mendell09ed1a32015-03-25 08:30:06 -040043X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
44 return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
45}
46
47ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
48 return codegen_->GetGraph()->GetArena();
49}
50
51bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
53 LocationSummary* res = invoke->GetLocations();
54 return res != nullptr && res->Intrinsified();
55}
56
57#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
58
59// TODO: target as memory.
60static void MoveFromReturnRegister(Location target,
61 Primitive::Type type,
62 CodeGeneratorX86* codegen) {
63 if (!target.IsValid()) {
64 DCHECK(type == Primitive::kPrimVoid);
65 return;
66 }
67
68 switch (type) {
69 case Primitive::kPrimBoolean:
70 case Primitive::kPrimByte:
71 case Primitive::kPrimChar:
72 case Primitive::kPrimShort:
73 case Primitive::kPrimInt:
74 case Primitive::kPrimNot: {
75 Register target_reg = target.AsRegister<Register>();
76 if (target_reg != EAX) {
77 __ movl(target_reg, EAX);
78 }
79 break;
80 }
81 case Primitive::kPrimLong: {
82 Register target_reg_lo = target.AsRegisterPairLow<Register>();
83 Register target_reg_hi = target.AsRegisterPairHigh<Register>();
84 if (target_reg_lo != EAX) {
85 __ movl(target_reg_lo, EAX);
86 }
87 if (target_reg_hi != EDX) {
88 __ movl(target_reg_hi, EDX);
89 }
90 break;
91 }
92
93 case Primitive::kPrimVoid:
94 LOG(FATAL) << "Unexpected void type for valid location " << target;
95 UNREACHABLE();
96
97 case Primitive::kPrimDouble: {
98 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
99 if (target_reg != XMM0) {
100 __ movsd(target_reg, XMM0);
101 }
102 break;
103 }
104 case Primitive::kPrimFloat: {
105 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
106 if (target_reg != XMM0) {
107 __ movss(target_reg, XMM0);
108 }
109 break;
110 }
111 }
112}
113
114static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
115 if (invoke->InputCount() == 0) {
116 return;
117 }
118
119 LocationSummary* locations = invoke->GetLocations();
120 InvokeDexCallingConventionVisitor calling_convention_visitor;
121
122 // We're moving potentially two or more locations to locations that could overlap, so we need
123 // a parallel move resolver.
124 HParallelMove parallel_move(arena);
125
126 for (size_t i = 0; i < invoke->InputCount(); i++) {
127 HInstruction* input = invoke->InputAt(i);
128 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
129 Location actual_loc = locations->InAt(i);
130
131 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
132 }
133
134 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
135}
136
137// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
138// call. This will copy the arguments into the positions for a regular call.
139//
140// Note: The actual parameters are required to be in the locations given by the invoke's location
141// summary. If an intrinsic modifies those locations before a slowpath call, they must be
142// restored!
143class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
144 public:
145 explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
146 : invoke_(invoke) {
147 // The temporary register has to be EAX for x86 invokes.
148 DCHECK_EQ(temp, EAX);
149 }
150
151 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
152 CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
153 __ Bind(GetEntryLabel());
154
155 SaveLiveRegisters(codegen, invoke_->GetLocations());
156
157 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
158
159 if (invoke_->IsInvokeStaticOrDirect()) {
160 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700161 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400162 } else {
163 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
164 UNREACHABLE();
165 }
166
167 // Copy the result back to the expected output.
168 Location out = invoke_->GetLocations()->Out();
169 if (out.IsValid()) {
170 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
171 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
172 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
173 }
174
175 RestoreLiveRegisters(codegen, invoke_->GetLocations());
176 __ jmp(GetExitLabel());
177 }
178
179 private:
180 // The instruction where this slow path is happening.
181 HInvoke* const invoke_;
182
183 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
184};
185
186#undef __
187#define __ assembler->
188
189static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
190 LocationSummary* locations = new (arena) LocationSummary(invoke,
191 LocationSummary::kNoCall,
192 kIntrinsified);
193 locations->SetInAt(0, Location::RequiresFpuRegister());
194 locations->SetOut(Location::RequiresRegister());
195 if (is64bit) {
196 locations->AddTemp(Location::RequiresFpuRegister());
197 }
198}
199
200static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
201 LocationSummary* locations = new (arena) LocationSummary(invoke,
202 LocationSummary::kNoCall,
203 kIntrinsified);
204 locations->SetInAt(0, Location::RequiresRegister());
205 locations->SetOut(Location::RequiresFpuRegister());
206 if (is64bit) {
207 locations->AddTemp(Location::RequiresFpuRegister());
208 locations->AddTemp(Location::RequiresFpuRegister());
209 }
210}
211
212static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
213 Location input = locations->InAt(0);
214 Location output = locations->Out();
215 if (is64bit) {
216 // Need to use the temporary.
217 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
218 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
219 __ movd(output.AsRegisterPairLow<Register>(), temp);
220 __ psrlq(temp, Immediate(32));
221 __ movd(output.AsRegisterPairHigh<Register>(), temp);
222 } else {
223 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
224 }
225}
226
227static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
228 Location input = locations->InAt(0);
229 Location output = locations->Out();
230 if (is64bit) {
231 // Need to use the temporary.
232 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
233 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
234 __ movd(temp1, input.AsRegisterPairLow<Register>());
235 __ movd(temp2, input.AsRegisterPairHigh<Register>());
236 __ punpckldq(temp1, temp2);
237 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
238 } else {
239 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
240 }
241}
242
243void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
244 CreateFPToIntLocations(arena_, invoke, true);
245}
246void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
247 CreateIntToFPLocations(arena_, invoke, true);
248}
249
250void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
251 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
252}
253void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
254 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
255}
256
257void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
258 CreateFPToIntLocations(arena_, invoke, false);
259}
260void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
261 CreateIntToFPLocations(arena_, invoke, false);
262}
263
264void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
265 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
266}
267void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
268 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
269}
270
271static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
272 LocationSummary* locations = new (arena) LocationSummary(invoke,
273 LocationSummary::kNoCall,
274 kIntrinsified);
275 locations->SetInAt(0, Location::RequiresRegister());
276 locations->SetOut(Location::SameAsFirstInput());
277}
278
279static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
280 LocationSummary* locations = new (arena) LocationSummary(invoke,
281 LocationSummary::kNoCall,
282 kIntrinsified);
283 locations->SetInAt(0, Location::RequiresRegister());
284 locations->SetOut(Location::RequiresRegister());
285}
286
287static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
288 LocationSummary* locations = new (arena) LocationSummary(invoke,
289 LocationSummary::kNoCall,
290 kIntrinsified);
291 locations->SetInAt(0, Location::RequiresRegister());
292 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
293}
294
295static void GenReverseBytes(LocationSummary* locations,
296 Primitive::Type size,
297 X86Assembler* assembler) {
298 Register out = locations->Out().AsRegister<Register>();
299
300 switch (size) {
301 case Primitive::kPrimShort:
302 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
303 __ bswapl(out);
304 __ sarl(out, Immediate(16));
305 break;
306 case Primitive::kPrimInt:
307 __ bswapl(out);
308 break;
309 default:
310 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
311 UNREACHABLE();
312 }
313}
314
315void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
316 CreateIntToIntLocations(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
320 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
321}
322
Mark Mendell58d25fd2015-04-03 14:52:31 -0400323void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
324 CreateLongToLongLocations(arena_, invoke);
325}
326
327void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
328 LocationSummary* locations = invoke->GetLocations();
329 Location input = locations->InAt(0);
330 Register input_lo = input.AsRegisterPairLow<Register>();
331 Register input_hi = input.AsRegisterPairHigh<Register>();
332 Location output = locations->Out();
333 Register output_lo = output.AsRegisterPairLow<Register>();
334 Register output_hi = output.AsRegisterPairHigh<Register>();
335
336 X86Assembler* assembler = GetAssembler();
337 // Assign the inputs to the outputs, mixing low/high.
338 __ movl(output_lo, input_hi);
339 __ movl(output_hi, input_lo);
340 __ bswapl(output_lo);
341 __ bswapl(output_hi);
342}
343
Mark Mendell09ed1a32015-03-25 08:30:06 -0400344void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
345 CreateIntToIntLocations(arena_, invoke);
346}
347
348void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
349 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
350}
351
352
353// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
354// need is 64b.
355
356static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
357 // TODO: Enable memory operations when the assembler supports them.
358 LocationSummary* locations = new (arena) LocationSummary(invoke,
359 LocationSummary::kNoCall,
360 kIntrinsified);
361 locations->SetInAt(0, Location::RequiresFpuRegister());
362 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
363 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
364 locations->SetOut(Location::SameAsFirstInput());
365}
366
367static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
368 Location output = locations->Out();
369
370 if (output.IsFpuRegister()) {
371 // Create the right constant on an aligned stack.
372 if (is64bit) {
373 __ subl(ESP, Immediate(8));
374 __ pushl(Immediate(0x7FFFFFFF));
375 __ pushl(Immediate(0xFFFFFFFF));
376 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
377 } else {
378 __ subl(ESP, Immediate(12));
379 __ pushl(Immediate(0x7FFFFFFF));
380 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
381 }
382 __ addl(ESP, Immediate(16));
383 } else {
384 // TODO: update when assember support is available.
385 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
386// Once assembler support is available, in-memory operations look like this:
387// if (is64bit) {
388// DCHECK(output.IsDoubleStackSlot());
389// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
390// Immediate(0x7FFFFFFF));
391// } else {
392// DCHECK(output.IsStackSlot());
393// // Can use and with a literal directly.
394// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
395// }
396 }
397}
398
399void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
400 CreateFloatToFloat(arena_, invoke);
401}
402
403void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
404 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
405}
406
407void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
408 CreateFloatToFloat(arena_, invoke);
409}
410
411void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
412 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
413}
414
415static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
416 LocationSummary* locations = new (arena) LocationSummary(invoke,
417 LocationSummary::kNoCall,
418 kIntrinsified);
419 locations->SetInAt(0, Location::RegisterLocation(EAX));
420 locations->SetOut(Location::SameAsFirstInput());
421 locations->AddTemp(Location::RegisterLocation(EDX));
422}
423
424static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
425 Location output = locations->Out();
426 Register out = output.AsRegister<Register>();
427 DCHECK_EQ(out, EAX);
428 Register temp = locations->GetTemp(0).AsRegister<Register>();
429 DCHECK_EQ(temp, EDX);
430
431 // Sign extend EAX into EDX.
432 __ cdq();
433
434 // XOR EAX with sign.
435 __ xorl(EAX, EDX);
436
437 // Subtract out sign to correct.
438 __ subl(EAX, EDX);
439
440 // The result is in EAX.
441}
442
443static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
444 LocationSummary* locations = new (arena) LocationSummary(invoke,
445 LocationSummary::kNoCall,
446 kIntrinsified);
447 locations->SetInAt(0, Location::RequiresRegister());
448 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
449 locations->AddTemp(Location::RequiresRegister());
450}
451
452static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
453 Location input = locations->InAt(0);
454 Register input_lo = input.AsRegisterPairLow<Register>();
455 Register input_hi = input.AsRegisterPairHigh<Register>();
456 Location output = locations->Out();
457 Register output_lo = output.AsRegisterPairLow<Register>();
458 Register output_hi = output.AsRegisterPairHigh<Register>();
459 Register temp = locations->GetTemp(0).AsRegister<Register>();
460
461 // Compute the sign into the temporary.
462 __ movl(temp, input_hi);
463 __ sarl(temp, Immediate(31));
464
465 // Store the sign into the output.
466 __ movl(output_lo, temp);
467 __ movl(output_hi, temp);
468
469 // XOR the input to the output.
470 __ xorl(output_lo, input_lo);
471 __ xorl(output_hi, input_hi);
472
473 // Subtract the sign.
474 __ subl(output_lo, temp);
475 __ sbbl(output_hi, temp);
476}
477
478void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
479 CreateAbsIntLocation(arena_, invoke);
480}
481
482void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
483 GenAbsInteger(invoke->GetLocations(), GetAssembler());
484}
485
486void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
487 CreateAbsLongLocation(arena_, invoke);
488}
489
490void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
491 GenAbsLong(invoke->GetLocations(), GetAssembler());
492}
493
494static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
495 X86Assembler* assembler) {
496 Location op1_loc = locations->InAt(0);
497 Location op2_loc = locations->InAt(1);
498 Location out_loc = locations->Out();
499 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
500
501 // Shortcut for same input locations.
502 if (op1_loc.Equals(op2_loc)) {
503 DCHECK(out_loc.Equals(op1_loc));
504 return;
505 }
506
507 // (out := op1)
508 // out <=? op2
509 // if Nan jmp Nan_label
510 // if out is min jmp done
511 // if op2 is min jmp op2_label
512 // handle -0/+0
513 // jmp done
514 // Nan_label:
515 // out := NaN
516 // op2_label:
517 // out := op2
518 // done:
519 //
520 // This removes one jmp, but needs to copy one input (op1) to out.
521 //
522 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
523
524 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
525
526 Label nan, done, op2_label;
527 if (is_double) {
528 __ ucomisd(out, op2);
529 } else {
530 __ ucomiss(out, op2);
531 }
532
533 __ j(Condition::kParityEven, &nan);
534
535 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
536 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
537
538 // Handle 0.0/-0.0.
539 if (is_min) {
540 if (is_double) {
541 __ orpd(out, op2);
542 } else {
543 __ orps(out, op2);
544 }
545 } else {
546 if (is_double) {
547 __ andpd(out, op2);
548 } else {
549 __ andps(out, op2);
550 }
551 }
552 __ jmp(&done);
553
554 // NaN handling.
555 __ Bind(&nan);
556 if (is_double) {
557 __ pushl(Immediate(kDoubleNaNHigh));
558 __ pushl(Immediate(kDoubleNaNLow));
559 __ movsd(out, Address(ESP, 0));
560 __ addl(ESP, Immediate(8));
561 } else {
562 __ pushl(Immediate(kFloatNaN));
563 __ movss(out, Address(ESP, 0));
564 __ addl(ESP, Immediate(4));
565 }
566 __ jmp(&done);
567
568 // out := op2;
569 __ Bind(&op2_label);
570 if (is_double) {
571 __ movsd(out, op2);
572 } else {
573 __ movss(out, op2);
574 }
575
576 // Done.
577 __ Bind(&done);
578}
579
580static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
581 LocationSummary* locations = new (arena) LocationSummary(invoke,
582 LocationSummary::kNoCall,
583 kIntrinsified);
584 locations->SetInAt(0, Location::RequiresFpuRegister());
585 locations->SetInAt(1, Location::RequiresFpuRegister());
586 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
587 // the second input to be the output (we can simply swap inputs).
588 locations->SetOut(Location::SameAsFirstInput());
589}
590
591void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
592 CreateFPFPToFPLocations(arena_, invoke);
593}
594
595void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
596 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
597}
598
599void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
600 CreateFPFPToFPLocations(arena_, invoke);
601}
602
603void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
604 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
605}
606
607void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
608 CreateFPFPToFPLocations(arena_, invoke);
609}
610
611void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
612 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
613}
614
615void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
616 CreateFPFPToFPLocations(arena_, invoke);
617}
618
619void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
620 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
621}
622
623static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
624 X86Assembler* assembler) {
625 Location op1_loc = locations->InAt(0);
626 Location op2_loc = locations->InAt(1);
627
628 // Shortcut for same input locations.
629 if (op1_loc.Equals(op2_loc)) {
630 // Can return immediately, as op1_loc == out_loc.
631 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
632 // a copy here.
633 DCHECK(locations->Out().Equals(op1_loc));
634 return;
635 }
636
637 if (is_long) {
638 // Need to perform a subtract to get the sign right.
639 // op1 is already in the same location as the output.
640 Location output = locations->Out();
641 Register output_lo = output.AsRegisterPairLow<Register>();
642 Register output_hi = output.AsRegisterPairHigh<Register>();
643
644 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
645 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
646
647 // Spare register to compute the subtraction to set condition code.
648 Register temp = locations->GetTemp(0).AsRegister<Register>();
649
650 // Subtract off op2_low.
651 __ movl(temp, output_lo);
652 __ subl(temp, op2_lo);
653
654 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
655 __ movl(temp, output_hi);
656 __ sbbl(temp, op2_hi);
657
658 // Now the condition code is correct.
659 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
660 __ cmovl(cond, output_lo, op2_lo);
661 __ cmovl(cond, output_hi, op2_hi);
662 } else {
663 Register out = locations->Out().AsRegister<Register>();
664 Register op2 = op2_loc.AsRegister<Register>();
665
666 // (out := op1)
667 // out <=? op2
668 // if out is min jmp done
669 // out := op2
670 // done:
671
672 __ cmpl(out, op2);
673 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
674 __ cmovl(cond, out, op2);
675 }
676}
677
678static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
679 LocationSummary* locations = new (arena) LocationSummary(invoke,
680 LocationSummary::kNoCall,
681 kIntrinsified);
682 locations->SetInAt(0, Location::RequiresRegister());
683 locations->SetInAt(1, Location::RequiresRegister());
684 locations->SetOut(Location::SameAsFirstInput());
685}
686
687static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
688 LocationSummary* locations = new (arena) LocationSummary(invoke,
689 LocationSummary::kNoCall,
690 kIntrinsified);
691 locations->SetInAt(0, Location::RequiresRegister());
692 locations->SetInAt(1, Location::RequiresRegister());
693 locations->SetOut(Location::SameAsFirstInput());
694 // Register to use to perform a long subtract to set cc.
695 locations->AddTemp(Location::RequiresRegister());
696}
697
698void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
699 CreateIntIntToIntLocations(arena_, invoke);
700}
701
702void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
703 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
704}
705
706void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
707 CreateLongLongToLongLocations(arena_, invoke);
708}
709
710void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
711 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
712}
713
714void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
715 CreateIntIntToIntLocations(arena_, invoke);
716}
717
718void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
719 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
720}
721
722void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
723 CreateLongLongToLongLocations(arena_, invoke);
724}
725
726void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
727 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
728}
729
730static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
731 LocationSummary* locations = new (arena) LocationSummary(invoke,
732 LocationSummary::kNoCall,
733 kIntrinsified);
734 locations->SetInAt(0, Location::RequiresFpuRegister());
735 locations->SetOut(Location::RequiresFpuRegister());
736}
737
738void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
739 CreateFPToFPLocations(arena_, invoke);
740}
741
742void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
743 LocationSummary* locations = invoke->GetLocations();
744 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
745 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
746
747 GetAssembler()->sqrtsd(out, in);
748}
749
Mark Mendellfb8d2792015-03-31 22:16:59 -0400750static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
751 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
752
753 DCHECK(invoke->IsInvokeStaticOrDirect());
754 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700755 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400756
757 // Copy the result back to the expected output.
758 Location out = invoke->GetLocations()->Out();
759 if (out.IsValid()) {
760 DCHECK(out.IsRegister());
761 MoveFromReturnRegister(out, invoke->GetType(), codegen);
762 }
763}
764
765static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
766 HInvoke* invoke,
767 CodeGeneratorX86* codegen) {
768 // Do we have instruction support?
769 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
770 CreateFPToFPLocations(arena, invoke);
771 return;
772 }
773
774 // We have to fall back to a call to the intrinsic.
775 LocationSummary* locations = new (arena) LocationSummary(invoke,
776 LocationSummary::kCall);
777 InvokeRuntimeCallingConvention calling_convention;
778 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
779 locations->SetOut(Location::FpuRegisterLocation(XMM0));
780 // Needs to be EAX for the invoke.
781 locations->AddTemp(Location::RegisterLocation(EAX));
782}
783
784static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
785 HInvoke* invoke,
786 X86Assembler* assembler,
787 int round_mode) {
788 LocationSummary* locations = invoke->GetLocations();
789 if (locations->WillCall()) {
790 InvokeOutOfLineIntrinsic(codegen, invoke);
791 } else {
792 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
793 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
794 __ roundsd(out, in, Immediate(round_mode));
795 }
796}
797
798void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
799 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
800}
801
802void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
803 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
804}
805
806void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
807 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
808}
809
810void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
811 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
812}
813
814void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
815 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
816}
817
818void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
819 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
820}
821
822// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
823// as it needs 64 bit instructions.
824void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
825 // Do we have instruction support?
826 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
827 LocationSummary* locations = new (arena_) LocationSummary(invoke,
828 LocationSummary::kNoCall,
829 kIntrinsified);
830 locations->SetInAt(0, Location::RequiresFpuRegister());
831 locations->SetOut(Location::RequiresFpuRegister());
832 locations->AddTemp(Location::RequiresFpuRegister());
833 locations->AddTemp(Location::RequiresFpuRegister());
834 return;
835 }
836
837 // We have to fall back to a call to the intrinsic.
838 LocationSummary* locations = new (arena_) LocationSummary(invoke,
839 LocationSummary::kCall);
840 InvokeRuntimeCallingConvention calling_convention;
841 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
842 locations->SetOut(Location::RegisterLocation(EAX));
843 // Needs to be EAX for the invoke.
844 locations->AddTemp(Location::RegisterLocation(EAX));
845}
846
847void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
848 LocationSummary* locations = invoke->GetLocations();
849 if (locations->WillCall()) {
850 InvokeOutOfLineIntrinsic(codegen_, invoke);
851 return;
852 }
853
854 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
855 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
856 Register out = locations->Out().AsRegister<Register>();
857 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
858 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
859 Label done, nan;
860 X86Assembler* assembler = GetAssembler();
861
862 // Generate 0.5 into inPlusPointFive.
863 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
864 __ movd(inPlusPointFive, out);
865
866 // Add in the input.
867 __ addss(inPlusPointFive, in);
868
869 // And truncate to an integer.
870 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
871
872 __ movl(out, Immediate(kPrimIntMax));
873 // maxInt = int-to-float(out)
874 __ cvtsi2ss(maxInt, out);
875
876 // if inPlusPointFive >= maxInt goto done
877 __ comiss(inPlusPointFive, maxInt);
878 __ j(kAboveEqual, &done);
879
880 // if input == NaN goto nan
881 __ j(kUnordered, &nan);
882
883 // output = float-to-int-truncate(input)
884 __ cvttss2si(out, inPlusPointFive);
885 __ jmp(&done);
886 __ Bind(&nan);
887
888 // output = 0
889 __ xorl(out, out);
890 __ Bind(&done);
891}
892
Mark Mendell09ed1a32015-03-25 08:30:06 -0400893void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
894 // The inputs plus one temp.
895 LocationSummary* locations = new (arena_) LocationSummary(invoke,
896 LocationSummary::kCallOnSlowPath,
897 kIntrinsified);
898 locations->SetInAt(0, Location::RequiresRegister());
899 locations->SetInAt(1, Location::RequiresRegister());
900 locations->SetOut(Location::SameAsFirstInput());
901 // Needs to be EAX for the invoke.
902 locations->AddTemp(Location::RegisterLocation(EAX));
903}
904
905void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
906 LocationSummary* locations = invoke->GetLocations();
907
908 // Location of reference to data array
909 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
910 // Location of count
911 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
912 // Starting offset within data array
913 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
914 // Start of char data with array_
915 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
916
917 Register obj = locations->InAt(0).AsRegister<Register>();
918 Register idx = locations->InAt(1).AsRegister<Register>();
919 Register out = locations->Out().AsRegister<Register>();
920 Location temp_loc = locations->GetTemp(0);
921 Register temp = temp_loc.AsRegister<Register>();
922
923 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
924 // the cost.
925 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
926 // we will not optimize the code for constants (which would save a register).
927
928 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
929 codegen_->AddSlowPath(slow_path);
930
931 X86Assembler* assembler = GetAssembler();
932
933 __ cmpl(idx, Address(obj, count_offset));
934 codegen_->MaybeRecordImplicitNullCheck(invoke);
935 __ j(kAboveEqual, slow_path->GetEntryLabel());
936
937 // Get the actual element.
938 __ movl(temp, idx); // temp := idx.
939 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
940 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
941 // out = out[2*temp].
942 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
943
944 __ Bind(slow_path->GetExitLabel());
945}
946
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000947void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
948 // The inputs plus one temp.
949 LocationSummary* locations = new (arena_) LocationSummary(invoke,
950 LocationSummary::kCall,
951 kIntrinsified);
952 InvokeRuntimeCallingConvention calling_convention;
953 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
954 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
955 locations->SetOut(Location::RegisterLocation(EAX));
956 // Needs to be EAX for the invoke.
957 locations->AddTemp(Location::RegisterLocation(EAX));
958}
959
960void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
961 X86Assembler* assembler = GetAssembler();
962 LocationSummary* locations = invoke->GetLocations();
963
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000964 // Note that the null check must have been done earlier.
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000965 DCHECK(!invoke->CanDoImplicitNullCheck());
966
967 Register argument = locations->InAt(1).AsRegister<Register>();
968 __ testl(argument, argument);
969 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
970 invoke, locations->GetTemp(0).AsRegister<Register>());
971 codegen_->AddSlowPath(slow_path);
972 __ j(kEqual, slow_path->GetEntryLabel());
973
974 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
975 __ Bind(slow_path->GetExitLabel());
976}
977
Mark Mendell09ed1a32015-03-25 08:30:06 -0400978static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
979 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
980 Location out_loc = locations->Out();
981 // x86 allows unaligned access. We do not have to check the input or use specific instructions
982 // to avoid a SIGBUS.
983 switch (size) {
984 case Primitive::kPrimByte:
985 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
986 break;
987 case Primitive::kPrimShort:
988 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
989 break;
990 case Primitive::kPrimInt:
991 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
992 break;
993 case Primitive::kPrimLong:
994 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
995 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
996 break;
997 default:
998 LOG(FATAL) << "Type not recognized for peek: " << size;
999 UNREACHABLE();
1000 }
1001}
1002
1003void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1004 CreateLongToIntLocations(arena_, invoke);
1005}
1006
1007void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1008 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1009}
1010
1011void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1012 CreateLongToIntLocations(arena_, invoke);
1013}
1014
1015void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1016 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1017}
1018
1019void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1020 CreateLongToLongLocations(arena_, invoke);
1021}
1022
1023void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1024 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1025}
1026
1027void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1028 CreateLongToIntLocations(arena_, invoke);
1029}
1030
1031void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1032 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1033}
1034
1035static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1036 HInvoke* invoke) {
1037 LocationSummary* locations = new (arena) LocationSummary(invoke,
1038 LocationSummary::kNoCall,
1039 kIntrinsified);
1040 locations->SetInAt(0, Location::RequiresRegister());
1041 HInstruction *value = invoke->InputAt(1);
1042 if (size == Primitive::kPrimByte) {
1043 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1044 } else {
1045 locations->SetInAt(1, Location::RegisterOrConstant(value));
1046 }
1047}
1048
1049static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1050 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1051 Location value_loc = locations->InAt(1);
1052 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1053 // to avoid a SIGBUS.
1054 switch (size) {
1055 case Primitive::kPrimByte:
1056 if (value_loc.IsConstant()) {
1057 __ movb(Address(address, 0),
1058 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1059 } else {
1060 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1061 }
1062 break;
1063 case Primitive::kPrimShort:
1064 if (value_loc.IsConstant()) {
1065 __ movw(Address(address, 0),
1066 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1067 } else {
1068 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1069 }
1070 break;
1071 case Primitive::kPrimInt:
1072 if (value_loc.IsConstant()) {
1073 __ movl(Address(address, 0),
1074 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1075 } else {
1076 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1077 }
1078 break;
1079 case Primitive::kPrimLong:
1080 if (value_loc.IsConstant()) {
1081 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1082 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1083 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1084 } else {
1085 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1086 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1087 }
1088 break;
1089 default:
1090 LOG(FATAL) << "Type not recognized for poke: " << size;
1091 UNREACHABLE();
1092 }
1093}
1094
1095void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1096 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1097}
1098
1099void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1100 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1101}
1102
1103void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1104 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1105}
1106
1107void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1108 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1109}
1110
1111void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1112 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1113}
1114
1115void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1116 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1117}
1118
1119void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1120 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1121}
1122
1123void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1124 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1125}
1126
1127void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1128 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1129 LocationSummary::kNoCall,
1130 kIntrinsified);
1131 locations->SetOut(Location::RequiresRegister());
1132}
1133
1134void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1135 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1136 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1137}
1138
1139static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1140 bool is_volatile, X86Assembler* assembler) {
1141 Register base = locations->InAt(1).AsRegister<Register>();
1142 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1143 Location output = locations->Out();
1144
1145 switch (type) {
1146 case Primitive::kPrimInt:
1147 case Primitive::kPrimNot:
1148 __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
1149 break;
1150
1151 case Primitive::kPrimLong: {
1152 Register output_lo = output.AsRegisterPairLow<Register>();
1153 Register output_hi = output.AsRegisterPairHigh<Register>();
1154 if (is_volatile) {
1155 // Need to use a XMM to read atomically.
1156 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1157 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1158 __ movd(output_lo, temp);
1159 __ psrlq(temp, Immediate(32));
1160 __ movd(output_hi, temp);
1161 } else {
1162 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1163 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1164 }
1165 }
1166 break;
1167
1168 default:
1169 LOG(FATAL) << "Unsupported op size " << type;
1170 UNREACHABLE();
1171 }
1172}
1173
1174static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
1175 bool is_long, bool is_volatile) {
1176 LocationSummary* locations = new (arena) LocationSummary(invoke,
1177 LocationSummary::kNoCall,
1178 kIntrinsified);
1179 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1180 locations->SetInAt(1, Location::RequiresRegister());
1181 locations->SetInAt(2, Location::RequiresRegister());
1182 if (is_long) {
1183 if (is_volatile) {
1184 // Need to use XMM to read volatile.
1185 locations->AddTemp(Location::RequiresFpuRegister());
1186 locations->SetOut(Location::RequiresRegister());
1187 } else {
1188 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1189 }
1190 } else {
1191 locations->SetOut(Location::RequiresRegister());
1192 }
1193}
1194
1195void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1196 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1197}
1198void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1199 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1200}
1201void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1202 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1203}
1204void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1205 CreateIntIntIntToIntLocations(arena_, invoke, true, true);
1206}
1207void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1208 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1209}
1210void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1211 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1212}
1213
1214
1215void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1216 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1217}
1218void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1219 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1220}
1221void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1222 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1223}
1224void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1225 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1226}
1227void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1228 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1229}
1230void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1231 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1232}
1233
1234
1235static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1236 Primitive::Type type,
1237 HInvoke* invoke,
1238 bool is_volatile) {
1239 LocationSummary* locations = new (arena) LocationSummary(invoke,
1240 LocationSummary::kNoCall,
1241 kIntrinsified);
1242 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1243 locations->SetInAt(1, Location::RequiresRegister());
1244 locations->SetInAt(2, Location::RequiresRegister());
1245 locations->SetInAt(3, Location::RequiresRegister());
1246 if (type == Primitive::kPrimNot) {
1247 // Need temp registers for card-marking.
1248 locations->AddTemp(Location::RequiresRegister());
1249 // Ensure the value is in a byte register.
1250 locations->AddTemp(Location::RegisterLocation(ECX));
1251 } else if (type == Primitive::kPrimLong && is_volatile) {
1252 locations->AddTemp(Location::RequiresFpuRegister());
1253 locations->AddTemp(Location::RequiresFpuRegister());
1254 }
1255}
1256
1257void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1258 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1259}
1260void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1261 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1262}
1263void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1264 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
1265}
1266void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1267 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1268}
1269void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1270 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1271}
1272void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1273 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
1274}
1275void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1276 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1277}
1278void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1279 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1280}
1281void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1282 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
1283}
1284
1285// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1286// memory model.
1287static void GenUnsafePut(LocationSummary* locations,
1288 Primitive::Type type,
1289 bool is_volatile,
1290 CodeGeneratorX86* codegen) {
1291 X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1292 Register base = locations->InAt(1).AsRegister<Register>();
1293 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1294 Location value_loc = locations->InAt(3);
1295
1296 if (type == Primitive::kPrimLong) {
1297 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1298 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1299 if (is_volatile) {
1300 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1301 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1302 __ movd(temp1, value_lo);
1303 __ movd(temp2, value_hi);
1304 __ punpckldq(temp1, temp2);
1305 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1306 } else {
1307 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1308 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1309 }
1310 } else {
1311 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1312 }
1313
1314 if (is_volatile) {
1315 __ mfence();
1316 }
1317
1318 if (type == Primitive::kPrimNot) {
1319 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1320 locations->GetTemp(1).AsRegister<Register>(),
1321 base,
1322 value_loc.AsRegister<Register>());
1323 }
1324}
1325
1326void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1327 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1328}
1329void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1330 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1331}
1332void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1333 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1334}
1335void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1336 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1337}
1338void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1339 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1340}
1341void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1342 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1343}
1344void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1345 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1346}
1347void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1348 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1349}
1350void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1351 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1352}
1353
Mark Mendell58d25fd2015-04-03 14:52:31 -04001354static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1355 HInvoke* invoke) {
1356 LocationSummary* locations = new (arena) LocationSummary(invoke,
1357 LocationSummary::kNoCall,
1358 kIntrinsified);
1359 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1360 locations->SetInAt(1, Location::RequiresRegister());
1361 // Offset is a long, but in 32 bit mode, we only need the low word.
1362 // Can we update the invoke here to remove a TypeConvert to Long?
1363 locations->SetInAt(2, Location::RequiresRegister());
1364 // Expected value must be in EAX or EDX:EAX.
1365 // For long, new value must be in ECX:EBX.
1366 if (type == Primitive::kPrimLong) {
1367 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1368 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1369 } else {
1370 locations->SetInAt(3, Location::RegisterLocation(EAX));
1371 locations->SetInAt(4, Location::RequiresRegister());
1372 }
1373
1374 // Force a byte register for the output.
1375 locations->SetOut(Location::RegisterLocation(EAX));
1376 if (type == Primitive::kPrimNot) {
1377 // Need temp registers for card-marking.
1378 locations->AddTemp(Location::RequiresRegister());
1379 // Need a byte register for marking.
1380 locations->AddTemp(Location::RegisterLocation(ECX));
1381 }
1382}
1383
1384void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1385 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1386}
1387
1388void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1389 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1390}
1391
1392void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1393 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1394}
1395
1396static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
1397 X86Assembler* assembler =
1398 reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1399 LocationSummary* locations = invoke->GetLocations();
1400
1401 Register base = locations->InAt(1).AsRegister<Register>();
1402 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1403 Location out = locations->Out();
1404 DCHECK_EQ(out.AsRegister<Register>(), EAX);
1405
1406 if (type == Primitive::kPrimLong) {
1407 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
1408 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
1409 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
1410 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
1411 __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
1412 } else {
1413 // Integer or object.
1414 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
1415 Register value = locations->InAt(4).AsRegister<Register>();
1416 if (type == Primitive::kPrimNot) {
1417 // Mark card for object assuming new value is stored.
1418 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1419 locations->GetTemp(1).AsRegister<Register>(),
1420 base,
1421 value);
1422 }
1423
1424 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1425 }
1426
1427 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1428 // barriers at this time.
1429
1430 // Convert ZF into the boolean result.
1431 __ setb(kZero, out.AsRegister<Register>());
1432 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
1433}
1434
1435void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
1436 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1437}
1438
1439void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
1440 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1441}
1442
1443void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
1444 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1445}
1446
1447void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
1448 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1449 LocationSummary::kNoCall,
1450 kIntrinsified);
1451 locations->SetInAt(0, Location::RequiresRegister());
1452 locations->SetOut(Location::SameAsFirstInput());
1453 locations->AddTemp(Location::RequiresRegister());
1454}
1455
1456static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
1457 X86Assembler* assembler) {
1458 Immediate imm_shift(shift);
1459 Immediate imm_mask(mask);
1460 __ movl(temp, reg);
1461 __ shrl(reg, imm_shift);
1462 __ andl(temp, imm_mask);
1463 __ andl(reg, imm_mask);
1464 __ shll(temp, imm_shift);
1465 __ orl(reg, temp);
1466}
1467
1468void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
1469 X86Assembler* assembler =
1470 reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
1471 LocationSummary* locations = invoke->GetLocations();
1472
1473 Register reg = locations->InAt(0).AsRegister<Register>();
1474 Register temp = locations->GetTemp(0).AsRegister<Register>();
1475
1476 /*
1477 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1478 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1479 * compared to generic luni implementation which has 5 rounds of swapping bits.
1480 * x = bswap x
1481 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1482 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1483 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1484 */
1485 __ bswapl(reg);
1486 SwapBits(reg, temp, 1, 0x55555555, assembler);
1487 SwapBits(reg, temp, 2, 0x33333333, assembler);
1488 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1489}
1490
1491void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
1492 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1493 LocationSummary::kNoCall,
1494 kIntrinsified);
1495 locations->SetInAt(0, Location::RequiresRegister());
1496 locations->SetOut(Location::SameAsFirstInput());
1497 locations->AddTemp(Location::RequiresRegister());
1498}
1499
1500void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
1501 X86Assembler* assembler =
1502 reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
1503 LocationSummary* locations = invoke->GetLocations();
1504
1505 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
1506 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
1507 Register temp = locations->GetTemp(0).AsRegister<Register>();
1508
1509 // We want to swap high/low, then bswap each one, and then do the same
1510 // as a 32 bit reverse.
1511 // Exchange high and low.
1512 __ movl(temp, reg_low);
1513 __ movl(reg_low, reg_high);
1514 __ movl(reg_high, temp);
1515
1516 // bit-reverse low
1517 __ bswapl(reg_low);
1518 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
1519 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
1520 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
1521
1522 // bit-reverse high
1523 __ bswapl(reg_high);
1524 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
1525 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
1526 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
1527}
1528
Mark Mendell09ed1a32015-03-25 08:30:06 -04001529// Unimplemented intrinsics.
1530
1531#define UNIMPLEMENTED_INTRINSIC(Name) \
1532void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1533} \
1534void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1535}
1536
Mark Mendell09ed1a32015-03-25 08:30:06 -04001537UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001538UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1539UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1540UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001541UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1542
1543} // namespace x86
1544} // namespace art