blob: 5bb19c193ca41e11a53476d087b559c7f8e0c4fb [file] [log] [blame]
Aart Bikf8f5a162017-02-06 15:35:29 -08001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "code_generator_x86.h"
18#include "mirror/array-inl.h"
19
20namespace art {
21namespace x86 {
22
23// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
24#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
25
26void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
27 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
28 switch (instruction->GetPackedType()) {
29 case Primitive::kPrimLong:
30 // Long needs extra temporary to load the register pair.
31 locations->AddTemp(Location::RequiresFpuRegister());
32 FALLTHROUGH_INTENDED;
33 case Primitive::kPrimBoolean:
34 case Primitive::kPrimByte:
35 case Primitive::kPrimChar:
36 case Primitive::kPrimShort:
37 case Primitive::kPrimInt:
38 locations->SetInAt(0, Location::RequiresRegister());
39 locations->SetOut(Location::RequiresFpuRegister());
40 break;
41 case Primitive::kPrimFloat:
42 case Primitive::kPrimDouble:
43 locations->SetInAt(0, Location::RequiresFpuRegister());
44 locations->SetOut(Location::SameAsFirstInput());
45 break;
46 default:
47 LOG(FATAL) << "Unsupported SIMD type";
48 UNREACHABLE();
49 }
50}
51
52void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
53 LocationSummary* locations = instruction->GetLocations();
54 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
55 switch (instruction->GetPackedType()) {
56 case Primitive::kPrimBoolean:
57 case Primitive::kPrimByte:
58 DCHECK_EQ(16u, instruction->GetVectorLength());
59 __ movd(reg, locations->InAt(0).AsRegister<Register>());
60 __ punpcklbw(reg, reg);
61 __ punpcklwd(reg, reg);
62 __ pshufd(reg, reg, Immediate(0));
63 break;
64 case Primitive::kPrimChar:
65 case Primitive::kPrimShort:
66 DCHECK_EQ(8u, instruction->GetVectorLength());
67 __ movd(reg, locations->InAt(0).AsRegister<Register>());
68 __ punpcklwd(reg, reg);
69 __ pshufd(reg, reg, Immediate(0));
70 break;
71 case Primitive::kPrimInt:
72 DCHECK_EQ(4u, instruction->GetVectorLength());
73 __ movd(reg, locations->InAt(0).AsRegister<Register>());
74 __ pshufd(reg, reg, Immediate(0));
75 break;
76 case Primitive::kPrimLong: {
77 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
78 DCHECK_EQ(2u, instruction->GetVectorLength());
79 __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
80 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
81 __ punpckldq(reg, tmp);
82 __ punpcklqdq(reg, reg);
83 break;
84 }
85 case Primitive::kPrimFloat:
86 DCHECK(locations->InAt(0).Equals(locations->Out()));
87 DCHECK_EQ(4u, instruction->GetVectorLength());
88 __ shufps(reg, reg, Immediate(0));
89 break;
90 case Primitive::kPrimDouble:
91 DCHECK(locations->InAt(0).Equals(locations->Out()));
92 DCHECK_EQ(2u, instruction->GetVectorLength());
93 __ shufpd(reg, reg, Immediate(0));
94 break;
95 default:
96 LOG(FATAL) << "Unsupported SIMD type";
97 UNREACHABLE();
98 }
99}
100
101void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
102 LOG(FATAL) << "No SIMD for " << instruction->GetId();
103}
104
105void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
106 LOG(FATAL) << "No SIMD for " << instruction->GetId();
107}
108
109void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
110 LOG(FATAL) << "No SIMD for " << instruction->GetId();
111}
112
113void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
114 LOG(FATAL) << "No SIMD for " << instruction->GetId();
115}
116
117// Helper to set up locations for vector unary operations.
118static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
119 LocationSummary* locations = new (arena) LocationSummary(instruction);
120 switch (instruction->GetPackedType()) {
121 case Primitive::kPrimBoolean:
122 case Primitive::kPrimByte:
123 case Primitive::kPrimChar:
124 case Primitive::kPrimShort:
125 case Primitive::kPrimInt:
126 case Primitive::kPrimLong:
127 case Primitive::kPrimFloat:
128 case Primitive::kPrimDouble:
129 locations->SetInAt(0, Location::RequiresFpuRegister());
130 locations->SetOut(Location::RequiresFpuRegister());
131 break;
132 default:
133 LOG(FATAL) << "Unsupported SIMD type";
134 UNREACHABLE();
135 }
136}
137
138void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
139 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
140}
141
142void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
143 LocationSummary* locations = instruction->GetLocations();
144 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
145 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
146 Primitive::Type from = instruction->GetInputType();
147 Primitive::Type to = instruction->GetResultType();
148 if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
149 DCHECK_EQ(4u, instruction->GetVectorLength());
150 __ cvtdq2ps(dst, src);
151 } else {
152 LOG(FATAL) << "Unsupported SIMD type";
153 }
154}
155
156void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
157 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
158}
159
160void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
161 LocationSummary* locations = instruction->GetLocations();
162 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
163 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
164 switch (instruction->GetPackedType()) {
165 case Primitive::kPrimByte:
166 DCHECK_EQ(16u, instruction->GetVectorLength());
167 __ pxor(dst, dst);
168 __ psubb(dst, src);
169 break;
170 case Primitive::kPrimChar:
171 case Primitive::kPrimShort:
172 DCHECK_EQ(8u, instruction->GetVectorLength());
173 __ pxor(dst, dst);
174 __ psubw(dst, src);
175 break;
176 case Primitive::kPrimInt:
177 DCHECK_EQ(4u, instruction->GetVectorLength());
178 __ pxor(dst, dst);
179 __ psubd(dst, src);
180 break;
181 case Primitive::kPrimLong:
182 DCHECK_EQ(2u, instruction->GetVectorLength());
183 __ pxor(dst, dst);
184 __ psubq(dst, src);
185 break;
186 case Primitive::kPrimFloat:
187 DCHECK_EQ(4u, instruction->GetVectorLength());
188 __ xorps(dst, dst);
189 __ subps(dst, src);
190 break;
191 case Primitive::kPrimDouble:
192 DCHECK_EQ(2u, instruction->GetVectorLength());
193 __ xorpd(dst, dst);
194 __ subpd(dst, src);
195 break;
196 default:
197 LOG(FATAL) << "Unsupported SIMD type";
198 UNREACHABLE();
199 }
200}
201
Aart Bik6daebeb2017-04-03 14:35:41 -0700202void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
203 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
Aart Bik472821b2017-04-27 17:23:51 -0700204 // Integral-abs requires a temporary for the comparison.
Aart Bik6daebeb2017-04-03 14:35:41 -0700205 if (instruction->GetPackedType() == Primitive::kPrimInt) {
206 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
207 }
208}
209
210void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
211 LocationSummary* locations = instruction->GetLocations();
212 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
213 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
214 switch (instruction->GetPackedType()) {
215 case Primitive::kPrimInt: {
216 DCHECK_EQ(4u, instruction->GetVectorLength());
217 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
218 __ movaps(dst, src);
219 __ pxor(tmp, tmp);
220 __ pcmpgtd(tmp, dst);
221 __ pxor(dst, tmp);
222 __ psubd(dst, tmp);
223 break;
224 }
225 case Primitive::kPrimFloat:
226 DCHECK_EQ(4u, instruction->GetVectorLength());
227 __ pcmpeqb(dst, dst); // all ones
228 __ psrld(dst, Immediate(1));
229 __ andps(dst, src);
230 break;
231 case Primitive::kPrimDouble:
232 DCHECK_EQ(2u, instruction->GetVectorLength());
233 __ pcmpeqb(dst, dst); // all ones
234 __ psrlq(dst, Immediate(1));
235 __ andpd(dst, src);
236 break;
237 default:
238 LOG(FATAL) << "Unsupported SIMD type";
239 UNREACHABLE();
240 }
241}
242
Aart Bikf8f5a162017-02-06 15:35:29 -0800243void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
244 CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
245 // Boolean-not requires a temporary to construct the 16 x one.
246 if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
247 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
248 }
249}
250
251void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
252 LocationSummary* locations = instruction->GetLocations();
253 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
254 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
255 switch (instruction->GetPackedType()) {
256 case Primitive::kPrimBoolean: { // special case boolean-not
257 DCHECK_EQ(16u, instruction->GetVectorLength());
258 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
259 __ pxor(dst, dst);
260 __ pcmpeqb(tmp, tmp); // all ones
261 __ psubb(dst, tmp); // 16 x one
262 __ pxor(dst, src);
263 break;
264 }
265 case Primitive::kPrimByte:
266 case Primitive::kPrimChar:
267 case Primitive::kPrimShort:
268 case Primitive::kPrimInt:
269 case Primitive::kPrimLong:
270 DCHECK_LE(2u, instruction->GetVectorLength());
271 DCHECK_LE(instruction->GetVectorLength(), 16u);
272 __ pcmpeqb(dst, dst); // all ones
273 __ pxor(dst, src);
274 break;
275 case Primitive::kPrimFloat:
276 DCHECK_EQ(4u, instruction->GetVectorLength());
277 __ pcmpeqb(dst, dst); // all ones
278 __ xorps(dst, src);
279 break;
280 case Primitive::kPrimDouble:
281 DCHECK_EQ(2u, instruction->GetVectorLength());
282 __ pcmpeqb(dst, dst); // all ones
283 __ xorpd(dst, src);
284 break;
285 default:
286 LOG(FATAL) << "Unsupported SIMD type";
287 UNREACHABLE();
288 }
289}
290
291// Helper to set up locations for vector binary operations.
292static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
293 LocationSummary* locations = new (arena) LocationSummary(instruction);
294 switch (instruction->GetPackedType()) {
295 case Primitive::kPrimBoolean:
296 case Primitive::kPrimByte:
297 case Primitive::kPrimChar:
298 case Primitive::kPrimShort:
299 case Primitive::kPrimInt:
300 case Primitive::kPrimLong:
301 case Primitive::kPrimFloat:
302 case Primitive::kPrimDouble:
303 locations->SetInAt(0, Location::RequiresFpuRegister());
304 locations->SetInAt(1, Location::RequiresFpuRegister());
305 locations->SetOut(Location::SameAsFirstInput());
306 break;
307 default:
308 LOG(FATAL) << "Unsupported SIMD type";
309 UNREACHABLE();
310 }
311}
312
313void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
314 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
315}
316
317void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
318 LocationSummary* locations = instruction->GetLocations();
319 DCHECK(locations->InAt(0).Equals(locations->Out()));
320 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
321 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
322 switch (instruction->GetPackedType()) {
323 case Primitive::kPrimByte:
324 DCHECK_EQ(16u, instruction->GetVectorLength());
325 __ paddb(dst, src);
326 break;
327 case Primitive::kPrimChar:
328 case Primitive::kPrimShort:
329 DCHECK_EQ(8u, instruction->GetVectorLength());
330 __ paddw(dst, src);
331 break;
332 case Primitive::kPrimInt:
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 __ paddd(dst, src);
335 break;
336 case Primitive::kPrimLong:
337 DCHECK_EQ(2u, instruction->GetVectorLength());
338 __ paddq(dst, src);
339 break;
340 case Primitive::kPrimFloat:
341 DCHECK_EQ(4u, instruction->GetVectorLength());
342 __ addps(dst, src);
343 break;
344 case Primitive::kPrimDouble:
345 DCHECK_EQ(2u, instruction->GetVectorLength());
346 __ addpd(dst, src);
347 break;
348 default:
349 LOG(FATAL) << "Unsupported SIMD type";
350 UNREACHABLE();
351 }
352}
353
Aart Bikf3e61ee2017-04-12 17:09:20 -0700354void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
355 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
356}
357
358void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
359 LocationSummary* locations = instruction->GetLocations();
360 DCHECK(locations->InAt(0).Equals(locations->Out()));
361 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
362 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
363
364 DCHECK(instruction->IsRounded());
365 DCHECK(instruction->IsUnsigned());
366
367 switch (instruction->GetPackedType()) {
368 case Primitive::kPrimByte:
369 DCHECK_EQ(16u, instruction->GetVectorLength());
370 __ pavgb(dst, src);
371 return;
372 case Primitive::kPrimChar:
373 case Primitive::kPrimShort:
374 DCHECK_EQ(8u, instruction->GetVectorLength());
375 __ pavgw(dst, src);
376 return;
377 default:
378 LOG(FATAL) << "Unsupported SIMD type";
379 UNREACHABLE();
380 }
381}
382
Aart Bikf8f5a162017-02-06 15:35:29 -0800383void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
384 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
385}
386
387void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
388 LocationSummary* locations = instruction->GetLocations();
389 DCHECK(locations->InAt(0).Equals(locations->Out()));
390 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
391 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
392 switch (instruction->GetPackedType()) {
393 case Primitive::kPrimByte:
394 DCHECK_EQ(16u, instruction->GetVectorLength());
395 __ psubb(dst, src);
396 break;
397 case Primitive::kPrimChar:
398 case Primitive::kPrimShort:
399 DCHECK_EQ(8u, instruction->GetVectorLength());
400 __ psubw(dst, src);
401 break;
402 case Primitive::kPrimInt:
403 DCHECK_EQ(4u, instruction->GetVectorLength());
404 __ psubd(dst, src);
405 break;
406 case Primitive::kPrimLong:
407 DCHECK_EQ(2u, instruction->GetVectorLength());
408 __ psubq(dst, src);
409 break;
410 case Primitive::kPrimFloat:
411 DCHECK_EQ(4u, instruction->GetVectorLength());
412 __ subps(dst, src);
413 break;
414 case Primitive::kPrimDouble:
415 DCHECK_EQ(2u, instruction->GetVectorLength());
416 __ subpd(dst, src);
417 break;
418 default:
419 LOG(FATAL) << "Unsupported SIMD type";
420 UNREACHABLE();
421 }
422}
423
424void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
425 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
426}
427
428void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
429 LocationSummary* locations = instruction->GetLocations();
430 DCHECK(locations->InAt(0).Equals(locations->Out()));
431 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
432 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
433 switch (instruction->GetPackedType()) {
434 case Primitive::kPrimChar:
435 case Primitive::kPrimShort:
436 DCHECK_EQ(8u, instruction->GetVectorLength());
437 __ pmullw(dst, src);
438 break;
439 case Primitive::kPrimInt:
440 DCHECK_EQ(4u, instruction->GetVectorLength());
441 __ pmulld(dst, src);
442 break;
443 case Primitive::kPrimFloat:
444 DCHECK_EQ(4u, instruction->GetVectorLength());
445 __ mulps(dst, src);
446 break;
447 case Primitive::kPrimDouble:
448 DCHECK_EQ(2u, instruction->GetVectorLength());
449 __ mulpd(dst, src);
450 break;
451 default:
452 LOG(FATAL) << "Unsupported SIMD type";
453 UNREACHABLE();
454 }
455}
456
457void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
458 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
459}
460
461void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
462 LocationSummary* locations = instruction->GetLocations();
463 DCHECK(locations->InAt(0).Equals(locations->Out()));
464 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
465 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
466 switch (instruction->GetPackedType()) {
467 case Primitive::kPrimFloat:
468 DCHECK_EQ(4u, instruction->GetVectorLength());
469 __ divps(dst, src);
470 break;
471 case Primitive::kPrimDouble:
472 DCHECK_EQ(2u, instruction->GetVectorLength());
473 __ divpd(dst, src);
474 break;
475 default:
476 LOG(FATAL) << "Unsupported SIMD type";
477 UNREACHABLE();
478 }
479}
480
Aart Bikf3e61ee2017-04-12 17:09:20 -0700481void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
482 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
483}
484
485void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
486 LOG(FATAL) << "No SIMD for " << instruction->GetId();
487}
488
489void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
490 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
491}
492
493void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
494 LOG(FATAL) << "No SIMD for " << instruction->GetId();
495}
496
Aart Bikf8f5a162017-02-06 15:35:29 -0800497void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
498 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
499}
500
501void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
502 LocationSummary* locations = instruction->GetLocations();
503 DCHECK(locations->InAt(0).Equals(locations->Out()));
504 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
505 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
506 switch (instruction->GetPackedType()) {
507 case Primitive::kPrimBoolean:
508 case Primitive::kPrimByte:
509 case Primitive::kPrimChar:
510 case Primitive::kPrimShort:
511 case Primitive::kPrimInt:
512 case Primitive::kPrimLong:
513 DCHECK_LE(2u, instruction->GetVectorLength());
514 DCHECK_LE(instruction->GetVectorLength(), 16u);
515 __ pand(dst, src);
516 break;
517 case Primitive::kPrimFloat:
518 DCHECK_EQ(4u, instruction->GetVectorLength());
519 __ andps(dst, src);
520 break;
521 case Primitive::kPrimDouble:
522 DCHECK_EQ(2u, instruction->GetVectorLength());
523 __ andpd(dst, src);
524 break;
525 default:
526 LOG(FATAL) << "Unsupported SIMD type";
527 UNREACHABLE();
528 }
529}
530
531void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
532 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
533}
534
535void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
536 LocationSummary* locations = instruction->GetLocations();
537 DCHECK(locations->InAt(0).Equals(locations->Out()));
538 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
539 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
540 switch (instruction->GetPackedType()) {
541 case Primitive::kPrimBoolean:
542 case Primitive::kPrimByte:
543 case Primitive::kPrimChar:
544 case Primitive::kPrimShort:
545 case Primitive::kPrimInt:
546 case Primitive::kPrimLong:
547 DCHECK_LE(2u, instruction->GetVectorLength());
548 DCHECK_LE(instruction->GetVectorLength(), 16u);
549 __ pandn(dst, src);
550 break;
551 case Primitive::kPrimFloat:
552 DCHECK_EQ(4u, instruction->GetVectorLength());
553 __ andnps(dst, src);
554 break;
555 case Primitive::kPrimDouble:
556 DCHECK_EQ(2u, instruction->GetVectorLength());
557 __ andnpd(dst, src);
558 break;
559 default:
560 LOG(FATAL) << "Unsupported SIMD type";
561 UNREACHABLE();
562 }
563}
564
565void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
566 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
567}
568
569void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
570 LocationSummary* locations = instruction->GetLocations();
571 DCHECK(locations->InAt(0).Equals(locations->Out()));
572 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
573 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
574 switch (instruction->GetPackedType()) {
575 case Primitive::kPrimBoolean:
576 case Primitive::kPrimByte:
577 case Primitive::kPrimChar:
578 case Primitive::kPrimShort:
579 case Primitive::kPrimInt:
580 case Primitive::kPrimLong:
581 DCHECK_LE(2u, instruction->GetVectorLength());
582 DCHECK_LE(instruction->GetVectorLength(), 16u);
583 __ por(dst, src);
584 break;
585 case Primitive::kPrimFloat:
586 DCHECK_EQ(4u, instruction->GetVectorLength());
587 __ orps(dst, src);
588 break;
589 case Primitive::kPrimDouble:
590 DCHECK_EQ(2u, instruction->GetVectorLength());
591 __ orpd(dst, src);
592 break;
593 default:
594 LOG(FATAL) << "Unsupported SIMD type";
595 UNREACHABLE();
596 }
597}
598
599void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
600 CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
601}
602
603void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
604 LocationSummary* locations = instruction->GetLocations();
605 DCHECK(locations->InAt(0).Equals(locations->Out()));
606 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
607 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
608 switch (instruction->GetPackedType()) {
609 case Primitive::kPrimBoolean:
610 case Primitive::kPrimByte:
611 case Primitive::kPrimChar:
612 case Primitive::kPrimShort:
613 case Primitive::kPrimInt:
614 case Primitive::kPrimLong:
615 DCHECK_LE(2u, instruction->GetVectorLength());
616 DCHECK_LE(instruction->GetVectorLength(), 16u);
617 __ pxor(dst, src);
618 break;
619 case Primitive::kPrimFloat:
620 DCHECK_EQ(4u, instruction->GetVectorLength());
621 __ xorps(dst, src);
622 break;
623 case Primitive::kPrimDouble:
624 DCHECK_EQ(2u, instruction->GetVectorLength());
625 __ xorpd(dst, src);
626 break;
627 default:
628 LOG(FATAL) << "Unsupported SIMD type";
629 UNREACHABLE();
630 }
631}
632
633// Helper to set up locations for vector shift operations.
634static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
635 LocationSummary* locations = new (arena) LocationSummary(instruction);
636 switch (instruction->GetPackedType()) {
637 case Primitive::kPrimChar:
638 case Primitive::kPrimShort:
639 case Primitive::kPrimInt:
640 case Primitive::kPrimLong:
641 locations->SetInAt(0, Location::RequiresFpuRegister());
642 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
643 locations->SetOut(Location::SameAsFirstInput());
644 break;
645 default:
646 LOG(FATAL) << "Unsupported SIMD type";
647 UNREACHABLE();
648 }
649}
650
651void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
652 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
653}
654
655void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
656 LocationSummary* locations = instruction->GetLocations();
657 DCHECK(locations->InAt(0).Equals(locations->Out()));
658 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
659 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
660 switch (instruction->GetPackedType()) {
661 case Primitive::kPrimChar:
662 case Primitive::kPrimShort:
663 DCHECK_EQ(8u, instruction->GetVectorLength());
664 __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
665 break;
666 case Primitive::kPrimInt:
667 DCHECK_EQ(4u, instruction->GetVectorLength());
668 __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
669 break;
670 case Primitive::kPrimLong:
671 DCHECK_EQ(2u, instruction->GetVectorLength());
672 __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
673 break;
674 default:
675 LOG(FATAL) << "Unsupported SIMD type";
676 UNREACHABLE();
677 }
678}
679
680void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
681 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
682}
683
684void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
685 LocationSummary* locations = instruction->GetLocations();
686 DCHECK(locations->InAt(0).Equals(locations->Out()));
687 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
688 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
689 switch (instruction->GetPackedType()) {
690 case Primitive::kPrimChar:
691 case Primitive::kPrimShort:
692 DCHECK_EQ(8u, instruction->GetVectorLength());
693 __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
694 break;
695 case Primitive::kPrimInt:
696 DCHECK_EQ(4u, instruction->GetVectorLength());
697 __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
698 break;
699 default:
700 LOG(FATAL) << "Unsupported SIMD type";
701 UNREACHABLE();
702 }
703}
704
705void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
706 CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
707}
708
709void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
710 LocationSummary* locations = instruction->GetLocations();
711 DCHECK(locations->InAt(0).Equals(locations->Out()));
712 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
713 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
714 switch (instruction->GetPackedType()) {
715 case Primitive::kPrimChar:
716 case Primitive::kPrimShort:
717 DCHECK_EQ(8u, instruction->GetVectorLength());
718 __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
719 break;
720 case Primitive::kPrimInt:
721 DCHECK_EQ(4u, instruction->GetVectorLength());
722 __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
723 break;
724 case Primitive::kPrimLong:
725 DCHECK_EQ(2u, instruction->GetVectorLength());
726 __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
727 break;
728 default:
729 LOG(FATAL) << "Unsupported SIMD type";
730 UNREACHABLE();
731 }
732}
733
Artem Serovf34dd202017-04-10 17:41:46 +0100734void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
735 LOG(FATAL) << "No SIMD for " << instr->GetId();
736}
737
738void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
739 LOG(FATAL) << "No SIMD for " << instr->GetId();
740}
741
Aart Bikf8f5a162017-02-06 15:35:29 -0800742// Helper to set up locations for vector memory operations.
743static void CreateVecMemLocations(ArenaAllocator* arena,
744 HVecMemoryOperation* instruction,
745 bool is_load) {
746 LocationSummary* locations = new (arena) LocationSummary(instruction);
747 switch (instruction->GetPackedType()) {
748 case Primitive::kPrimBoolean:
749 case Primitive::kPrimByte:
750 case Primitive::kPrimChar:
751 case Primitive::kPrimShort:
752 case Primitive::kPrimInt:
753 case Primitive::kPrimLong:
754 case Primitive::kPrimFloat:
755 case Primitive::kPrimDouble:
756 locations->SetInAt(0, Location::RequiresRegister());
757 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
758 if (is_load) {
759 locations->SetOut(Location::RequiresFpuRegister());
760 } else {
761 locations->SetInAt(2, Location::RequiresFpuRegister());
762 }
763 break;
764 default:
765 LOG(FATAL) << "Unsupported SIMD type";
766 UNREACHABLE();
767 }
768}
769
Aart Bik472821b2017-04-27 17:23:51 -0700770// Helper to construct address for vector memory operations.
771static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800772 Location base = locations->InAt(0);
773 Location index = locations->InAt(1);
Aart Bikf8f5a162017-02-06 15:35:29 -0800774 ScaleFactor scale = TIMES_1;
775 switch (size) {
776 case 2: scale = TIMES_2; break;
777 case 4: scale = TIMES_4; break;
778 case 8: scale = TIMES_8; break;
779 default: break;
780 }
Aart Bik472821b2017-04-27 17:23:51 -0700781 uint32_t offset = is_string_char_at
782 ? mirror::String::ValueOffset().Uint32Value()
783 : mirror::Array::DataOffset(size).Uint32Value();
Aart Bikf8f5a162017-02-06 15:35:29 -0800784 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
785}
786
787void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
788 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
Aart Bik472821b2017-04-27 17:23:51 -0700789 // String load requires a temporary for the compressed load.
790 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
791 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
792 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800793}
794
795void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
Aart Bik472821b2017-04-27 17:23:51 -0700796 LocationSummary* locations = instruction->GetLocations();
797 size_t size = Primitive::ComponentSize(instruction->GetPackedType());
798 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
799 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
Aart Bikf8f5a162017-02-06 15:35:29 -0800800 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
801 switch (instruction->GetPackedType()) {
Aart Bik472821b2017-04-27 17:23:51 -0700802 case Primitive::kPrimChar:
803 DCHECK_EQ(8u, instruction->GetVectorLength());
804 // Special handling of compressed/uncompressed string load.
805 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
806 NearLabel done, not_compressed;
807 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
808 // Test compression bit.
809 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
810 "Expecting 0=compressed, 1=uncompressed");
811 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
812 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
813 __ j(kNotZero, &not_compressed);
814 // Zero extend 8 compressed bytes into 8 chars.
815 __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
816 __ pxor(tmp, tmp);
817 __ punpcklbw(reg, tmp);
818 __ jmp(&done);
819 // Load 4 direct uncompressed chars.
820 __ Bind(&not_compressed);
821 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
822 __ Bind(&done);
823 return;
824 }
825 FALLTHROUGH_INTENDED;
Aart Bikf8f5a162017-02-06 15:35:29 -0800826 case Primitive::kPrimBoolean:
827 case Primitive::kPrimByte:
Aart Bikf8f5a162017-02-06 15:35:29 -0800828 case Primitive::kPrimShort:
829 case Primitive::kPrimInt:
830 case Primitive::kPrimLong:
831 DCHECK_LE(2u, instruction->GetVectorLength());
832 DCHECK_LE(instruction->GetVectorLength(), 16u);
833 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
834 break;
835 case Primitive::kPrimFloat:
836 DCHECK_EQ(4u, instruction->GetVectorLength());
837 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
838 break;
839 case Primitive::kPrimDouble:
840 DCHECK_EQ(2u, instruction->GetVectorLength());
841 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
842 break;
843 default:
844 LOG(FATAL) << "Unsupported SIMD type";
845 UNREACHABLE();
846 }
847}
848
849void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
850 CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
851}
852
853void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
Aart Bik472821b2017-04-27 17:23:51 -0700854 LocationSummary* locations = instruction->GetLocations();
855 size_t size = Primitive::ComponentSize(instruction->GetPackedType());
856 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
857 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
Aart Bikf8f5a162017-02-06 15:35:29 -0800858 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
859 switch (instruction->GetPackedType()) {
860 case Primitive::kPrimBoolean:
861 case Primitive::kPrimByte:
862 case Primitive::kPrimChar:
863 case Primitive::kPrimShort:
864 case Primitive::kPrimInt:
865 case Primitive::kPrimLong:
866 DCHECK_LE(2u, instruction->GetVectorLength());
867 DCHECK_LE(instruction->GetVectorLength(), 16u);
868 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
869 break;
870 case Primitive::kPrimFloat:
871 DCHECK_EQ(4u, instruction->GetVectorLength());
872 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
873 break;
874 case Primitive::kPrimDouble:
875 DCHECK_EQ(2u, instruction->GetVectorLength());
876 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
877 break;
878 default:
879 LOG(FATAL) << "Unsupported SIMD type";
880 UNREACHABLE();
881 }
882}
883
884#undef __
885
886} // namespace x86
887} // namespace art