blob: 92776f14e40a14c54dd1ac0e13c3da6296d1576a [file] [log] [blame]
Andreas Gampe878d58c2015-01-15 23:24:00 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm64.h"
18
Vladimir Markoa41ea272020-09-07 15:24:36 +000019#include "arch/arm64/callee_save_frame_arm64.h"
Serban Constantinescu579885a2015-02-22 20:51:33 +000020#include "arch/arm64/instruction_set_features_arm64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070021#include "art_method.h"
Vladimir Markoa41ea272020-09-07 15:24:36 +000022#include "base/bit_utils.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080023#include "code_generator_arm64.h"
24#include "common_arm64.h"
Vladimir Marko79db6462020-07-31 14:57:32 +010025#include "data_type-inl.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080026#include "entrypoints/quick/quick_entrypoints.h"
Andreas Gampe09659c22017-09-18 18:23:32 -070027#include "heap_poisoning.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080028#include "intrinsics.h"
Vladimir Marko9922f002020-06-08 15:05:15 +010029#include "intrinsics_utils.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080030#include "lock_word.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080031#include "mirror/array-inl.h"
Andreas Gampec15a2f42017-04-21 12:09:39 -070032#include "mirror/object_array-inl.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080033#include "mirror/reference.h"
Vladimir Markoe39f14f2017-02-10 15:44:25 +000034#include "mirror/string-inl.h"
Vladimir Marko79db6462020-07-31 14:57:32 +010035#include "mirror/var_handle.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080036#include "scoped_thread_state_change-inl.h"
Andreas Gampeb486a982017-06-01 13:45:54 -070037#include "thread-current-inl.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080038#include "utils/arm64/assembler_arm64.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080039
Scott Wakeling97c72b72016-06-24 16:19:36 +010040using namespace vixl::aarch64; // NOLINT(build/namespaces)
Andreas Gampe878d58c2015-01-15 23:24:00 -080041
Artem Serovaf4e42a2016-08-08 15:11:24 +010042// TODO(VIXL): Make VIXL compile with -Wshadow.
Scott Wakeling97c72b72016-06-24 16:19:36 +010043#pragma GCC diagnostic push
44#pragma GCC diagnostic ignored "-Wshadow"
Artem Serovaf4e42a2016-08-08 15:11:24 +010045#include "aarch64/disasm-aarch64.h"
46#include "aarch64/macro-assembler-aarch64.h"
Scott Wakeling97c72b72016-06-24 16:19:36 +010047#pragma GCC diagnostic pop
Andreas Gampe878d58c2015-01-15 23:24:00 -080048
Vladimir Marko0a516052019-10-14 13:00:44 +000049namespace art {
Andreas Gampe878d58c2015-01-15 23:24:00 -080050
51namespace arm64 {
52
Vladimir Marko98873af2020-12-16 12:10:03 +000053using helpers::CPURegisterFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080054using helpers::DRegisterFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080055using helpers::HeapOperand;
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +000056using helpers::LocationFrom;
Vladimir Markoa41ea272020-09-07 15:24:36 +000057using helpers::InputCPURegisterOrZeroRegAt;
58using helpers::IsConstantZeroBitPattern;
Scott Wakeling9ee23f42015-07-23 10:44:35 +010059using helpers::OperandFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080060using helpers::RegisterFrom;
61using helpers::SRegisterFrom;
62using helpers::WRegisterFrom;
63using helpers::XRegisterFrom;
Usama Arif457e9fa2019-11-11 15:29:59 +000064using helpers::HRegisterFrom;
xueliang.zhong49924c92016-03-03 10:52:51 +000065using helpers::InputRegisterAt;
Scott Wakeling1f36f412016-04-21 11:13:45 +010066using helpers::OutputRegister;
Andreas Gampe878d58c2015-01-15 23:24:00 -080067
Andreas Gampe878d58c2015-01-15 23:24:00 -080068namespace {
69
70ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
71 return MemOperand(XRegisterFrom(location), offset);
72}
73
74} // namespace
75
Scott Wakeling97c72b72016-06-24 16:19:36 +010076MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
Alexandre Rames087930f2016-08-02 13:45:28 +010077 return codegen_->GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -080078}
79
80ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
Vladimir Markoca6fff82017-10-03 14:49:14 +010081 return codegen_->GetGraph()->GetAllocator();
Andreas Gampe878d58c2015-01-15 23:24:00 -080082}
83
Vladimir Marko9922f002020-06-08 15:05:15 +010084using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
85 SlowPathCodeARM64,
86 Arm64Assembler>;
87
Alexandre Rames087930f2016-08-02 13:45:28 +010088#define __ codegen->GetVIXLAssembler()->
Andreas Gampe878d58c2015-01-15 23:24:00 -080089
Roland Levillain0b671c02016-08-19 12:02:34 +010090// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
92 public:
93 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
94 : SlowPathCodeARM64(instruction), tmp_(tmp) {
95 DCHECK(kEmitCompilerReadBarrier);
96 DCHECK(kUseBakerReadBarrier);
97 }
98
Roland Levillainbbc6e7e2018-08-24 16:58:47 +010099 void EmitNativeCode(CodeGenerator* codegen_in) override {
Roland Levillain0b671c02016-08-19 12:02:34 +0100100 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
101 LocationSummary* locations = instruction_->GetLocations();
102 DCHECK(locations->CanCall());
103 DCHECK(instruction_->IsInvokeStaticOrDirect())
104 << "Unexpected instruction in read barrier arraycopy slow path: "
105 << instruction_->DebugName();
106 DCHECK(instruction_->GetLocations()->Intrinsified());
107 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100109 const int32_t element_size = DataType::Size(DataType::Type::kReference);
Roland Levillain0b671c02016-08-19 12:02:34 +0100110
111 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
112 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
113 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
114 Register tmp_reg = WRegisterFrom(tmp_);
115
116 __ Bind(GetEntryLabel());
117 vixl::aarch64::Label slow_copy_loop;
118 __ Bind(&slow_copy_loop);
119 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
120 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
121 // TODO: Inline the mark bit check before calling the runtime?
122 // tmp_reg = ReadBarrier::Mark(tmp_reg);
123 // No need to save live registers; it's taken care of by the
124 // entrypoint. Also, there is no need to update the stack mask,
125 // as this runtime call will not trigger a garbage collection.
126 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
127 // explanations.)
128 DCHECK_NE(tmp_.reg(), LR);
129 DCHECK_NE(tmp_.reg(), WSP);
130 DCHECK_NE(tmp_.reg(), WZR);
131 // IP0 is used internally by the ReadBarrierMarkRegX entry point
132 // as a temporary (and not preserved). It thus cannot be used by
133 // any live register in this slow path.
134 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
135 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
136 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
137 DCHECK_NE(tmp_.reg(), IP0);
138 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000139 // TODO: Load the entrypoint once before the loop, instead of
140 // loading it at every iteration.
Roland Levillain0b671c02016-08-19 12:02:34 +0100141 int32_t entry_point_offset =
Roland Levillain97c46462017-05-11 14:04:03 +0100142 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
Roland Levillain0b671c02016-08-19 12:02:34 +0100143 // This runtime call does not require a stack map.
144 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
146 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
147 __ Cmp(src_curr_addr, src_stop_addr);
148 __ B(&slow_copy_loop, ne);
149 __ B(GetExitLabel());
150 }
151
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100152 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
Roland Levillain0b671c02016-08-19 12:02:34 +0100153
154 private:
155 Location tmp_;
156
157 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
158};
Andreas Gampe878d58c2015-01-15 23:24:00 -0800159#undef __
160
161bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
162 Dispatch(invoke);
163 LocationSummary* res = invoke->GetLocations();
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000164 if (res == nullptr) {
165 return false;
166 }
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000167 return res->Intrinsified();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800168}
169
170#define __ masm->
171
Vladimir Markoca6fff82017-10-03 14:49:14 +0100172static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173 LocationSummary* locations =
174 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800175 locations->SetInAt(0, Location::RequiresFpuRegister());
176 locations->SetOut(Location::RequiresRegister());
177}
178
Vladimir Markoca6fff82017-10-03 14:49:14 +0100179static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
180 LocationSummary* locations =
181 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800182 locations->SetInAt(0, Location::RequiresRegister());
183 locations->SetOut(Location::RequiresFpuRegister());
184}
185
Scott Wakeling97c72b72016-06-24 16:19:36 +0100186static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
190 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
191}
192
Scott Wakeling97c72b72016-06-24 16:19:36 +0100193static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
197 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
198}
199
200void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100201 CreateFPToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800202}
203void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100204 CreateIntToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800205}
206
207void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800208 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800209}
210void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800211 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800212}
213
214void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100215 CreateFPToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800216}
217void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100218 CreateIntToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800219}
220
221void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800222 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800223}
224void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800225 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800226}
227
Vladimir Markoca6fff82017-10-03 14:49:14 +0100228static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
229 LocationSummary* locations =
230 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800231 locations->SetInAt(0, Location::RequiresRegister());
232 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
233}
234
Usama Arif457e9fa2019-11-11 15:29:59 +0000235static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
236 LocationSummary* locations =
237 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
238 locations->SetInAt(0, Location::RequiresRegister());
239 locations->SetInAt(1, Location::RequiresRegister());
240 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
241}
242
Vladimir Marko8942b3a2020-07-20 10:42:15 +0100243static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
244 LocationSummary* locations =
245 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
246 locations->SetInAt(0, Location::RequiresRegister());
247 locations->SetInAt(1, Location::RequiresRegister());
248 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
249 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250}
251
Vladimir Marko98873af2020-12-16 12:10:03 +0000252static void GenerateReverseBytes(MacroAssembler* masm,
253 DataType::Type type,
254 CPURegister in,
255 CPURegister out) {
256 switch (type) {
257 case DataType::Type::kUint16:
258 __ Rev16(out.W(), in.W());
259 break;
260 case DataType::Type::kInt16:
261 __ Rev16(out.W(), in.W());
262 __ Sxth(out.W(), out.W());
263 break;
264 case DataType::Type::kInt32:
265 __ Rev(out.W(), in.W());
266 break;
267 case DataType::Type::kInt64:
268 __ Rev(out.X(), in.X());
269 break;
270 case DataType::Type::kFloat32:
271 __ Rev(in.W(), in.W()); // Note: Clobbers `in`.
272 __ Fmov(out.S(), in.W());
273 break;
274 case DataType::Type::kFloat64:
275 __ Rev(in.X(), in.X()); // Note: Clobbers `in`.
276 __ Fmov(out.D(), in.X());
277 break;
278 default:
279 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
280 UNREACHABLE();
281 }
282}
283
Andreas Gampe878d58c2015-01-15 23:24:00 -0800284static void GenReverseBytes(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100285 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100286 MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800287 Location in = locations->InAt(0);
288 Location out = locations->Out();
Vladimir Marko98873af2020-12-16 12:10:03 +0000289 GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800290}
291
292void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100293 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800294}
295
296void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100297 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800298}
299
300void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100301 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800302}
303
304void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100305 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800306}
307
308void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100309 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800310}
311
312void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100313 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800314}
315
Scott Wakeling611d3392015-07-10 11:42:06 +0100316static void GenNumberOfLeadingZeros(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100317 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100318 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100319 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Scott Wakeling611d3392015-07-10 11:42:06 +0100320
321 Location in = locations->InAt(0);
322 Location out = locations->Out();
323
324 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
325}
326
327void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100328 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling611d3392015-07-10 11:42:06 +0100329}
330
331void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100332 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Scott Wakeling611d3392015-07-10 11:42:06 +0100333}
334
335void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100336 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling611d3392015-07-10 11:42:06 +0100337}
338
339void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100340 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Scott Wakeling611d3392015-07-10 11:42:06 +0100341}
342
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100343static void GenNumberOfTrailingZeros(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100344 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100345 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100346 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100347
348 Location in = locations->InAt(0);
349 Location out = locations->Out();
350
351 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
352 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
353}
354
355void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100356 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100357}
358
359void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100360 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100361}
362
363void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100364 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100365}
366
367void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100368 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100369}
370
Andreas Gampe878d58c2015-01-15 23:24:00 -0800371static void GenReverse(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100372 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100373 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100374 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800375
376 Location in = locations->InAt(0);
377 Location out = locations->Out();
378
379 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
380}
381
382void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100383 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800384}
385
386void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100387 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800388}
389
390void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100391 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800392}
393
394void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100395 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800396}
397
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100398static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
399 DCHECK(DataType::IsIntOrLongType(type)) << type;
400 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
401 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
xueliang.zhong49924c92016-03-03 10:52:51 +0000402
xueliang.zhong49924c92016-03-03 10:52:51 +0000403 UseScratchRegisterScope temps(masm);
404
Nicolas Geoffray457413a2016-03-04 11:10:17 +0000405 Register src = InputRegisterAt(instr, 0);
Roland Levillainfa3912e2016-04-01 18:21:55 +0100406 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +0100407 VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
xueliang.zhong49924c92016-03-03 10:52:51 +0000408
409 __ Fmov(fpr, src);
Nicolas Geoffray457413a2016-03-04 11:10:17 +0000410 __ Cnt(fpr.V8B(), fpr.V8B());
411 __ Addv(fpr.B(), fpr.V8B());
xueliang.zhong49924c92016-03-03 10:52:51 +0000412 __ Fmov(dst, fpr);
413}
414
415void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100416 CreateIntToIntLocations(allocator_, invoke);
xueliang.zhong49924c92016-03-03 10:52:51 +0000417}
418
419void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100420 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
xueliang.zhong49924c92016-03-03 10:52:51 +0000421}
422
423void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100424 CreateIntToIntLocations(allocator_, invoke);
xueliang.zhong49924c92016-03-03 10:52:51 +0000425}
426
427void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100428 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
xueliang.zhong49924c92016-03-03 10:52:51 +0000429}
430
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100431static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
432 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100433
434 UseScratchRegisterScope temps(masm);
435
436 Register src = InputRegisterAt(invoke, 0);
437 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100438 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
439 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
440 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100441
442 __ Clz(temp, src);
443 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
444 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
445 __ Lsr(dst, dst, temp);
446}
447
448void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100449 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100450}
451
452void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100453 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100454}
455
456void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100457 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100458}
459
460void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100461 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100462}
463
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100464static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
465 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100466
467 UseScratchRegisterScope temps(masm);
468
469 Register src = InputRegisterAt(invoke, 0);
470 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100471 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100472
473 __ Neg(temp, src);
474 __ And(dst, temp, src);
475}
476
477void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100478 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100479}
480
481void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100482 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100483}
484
485void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100486 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100487}
488
489void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100490 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100491}
492
Aart Bik3dad3412018-02-28 12:01:46 -0800493static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
494 LocationSummary* locations =
495 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
496 locations->SetInAt(0, Location::RequiresFpuRegister());
497 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
498}
499
Andreas Gampe878d58c2015-01-15 23:24:00 -0800500void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100501 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800502}
503
504void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
505 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100506 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800507 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
508}
509
510void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100511 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800512}
513
514void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
515 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100516 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800517 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
518}
519
520void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100521 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800522}
523
524void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
525 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100526 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800527 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
528}
529
530void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100531 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800532}
533
534void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
535 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100536 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800537 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
538}
539
Vladimir Markoca6fff82017-10-03 14:49:14 +0100540static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
541 LocationSummary* locations =
542 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800543 locations->SetInAt(0, Location::RequiresFpuRegister());
544 locations->SetOut(Location::RequiresRegister());
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100545 locations->AddTemp(Location::RequiresFpuRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800546}
547
Scott Wakeling97c72b72016-06-24 16:19:36 +0100548static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100549 // Java 8 API definition for Math.round():
550 // Return the closest long or int to the argument, with ties rounding to positive infinity.
551 //
552 // There is no single instruction in ARMv8 that can support the above definition.
553 // We choose to use FCVTAS here, because it has closest semantic.
554 // FCVTAS performs rounding to nearest integer, ties away from zero.
555 // For most inputs (positive values, zero or NaN), this instruction is enough.
556 // We only need a few handling code after FCVTAS if the input is negative half value.
557 //
558 // The reason why we didn't choose FCVTPS instruction here is that
559 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
560 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
561 // If we were using this instruction, for most inputs, more handling code would be needed.
562 LocationSummary* l = invoke->GetLocations();
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +0100563 VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
564 VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100565 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
Scott Wakeling97c72b72016-06-24 16:19:36 +0100566 vixl::aarch64::Label done;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800567
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100568 // Round to nearest integer, ties away from zero.
569 __ Fcvtas(out_reg, in_reg);
570
571 // For positive values, zero or NaN inputs, rounding is done.
Scott Wakeling97c72b72016-06-24 16:19:36 +0100572 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100573
574 // Handle input < 0 cases.
575 // If input is negative but not a tie, previous result (round to nearest) is valid.
576 // If input is a negative tie, out_reg += 1.
577 __ Frinta(tmp_fp, in_reg);
578 __ Fsub(tmp_fp, in_reg, tmp_fp);
579 __ Fcmp(tmp_fp, 0.5);
580 __ Cinc(out_reg, out_reg, eq);
581
582 __ Bind(&done);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800583}
584
585void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100586 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800587}
588
589void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800590 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800591}
592
593void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100594 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800595}
596
597void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800598 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800599}
600
601void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100602 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800603}
604
605void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100606 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800607 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
608 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
609}
610
611void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100612 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800613}
614
615void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100616 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800617 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
618 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
619}
620
621void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100622 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800623}
624
625void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100626 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800627 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
628 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
629}
630
631void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100632 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800633}
634
635void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100636 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800637 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
638 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
639}
640
Vladimir Markoca6fff82017-10-03 14:49:14 +0100641static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
642 LocationSummary* locations =
643 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800644 locations->SetInAt(0, Location::RequiresRegister());
645 locations->SetInAt(1, Location::RequiresRegister());
646}
647
648void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100649 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800650}
651
652void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100653 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800654 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
655 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
656}
657
658void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100659 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800660}
661
662void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100663 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800664 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
665 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
666}
667
668void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100669 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800670}
671
672void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100673 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800674 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
675 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
676}
677
678void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100679 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800680}
681
682void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100683 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800684 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
685 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
686}
687
688void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100689 LocationSummary* locations =
690 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800691 locations->SetOut(Location::RequiresRegister());
692}
693
694void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100695 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
Andreas Gampe542451c2016-07-26 09:02:02 -0700696 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800697}
698
699static void GenUnsafeGet(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100700 DataType::Type type,
Andreas Gampe878d58c2015-01-15 23:24:00 -0800701 bool is_volatile,
702 CodeGeneratorARM64* codegen) {
703 LocationSummary* locations = invoke->GetLocations();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100704 DCHECK((type == DataType::Type::kInt32) ||
705 (type == DataType::Type::kInt64) ||
706 (type == DataType::Type::kReference));
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000707 Location base_loc = locations->InAt(1);
708 Register base = WRegisterFrom(base_loc); // Object pointer.
709 Location offset_loc = locations->InAt(2);
710 Register offset = XRegisterFrom(offset_loc); // Long offset.
711 Location trg_loc = locations->Out();
712 Register trg = RegisterFrom(trg_loc, type);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800713
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100714 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Roland Levillain44015862016-01-22 11:47:17 +0000715 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Roland Levillain54f869e2017-03-06 13:54:11 +0000716 Register temp = WRegisterFrom(locations->GetTemp(0));
Vladimir Marko248141f2018-08-10 10:40:07 +0100717 MacroAssembler* masm = codegen->GetVIXLAssembler();
718 // Piggy-back on the field load path using introspection for the Baker read barrier.
719 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
720 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
721 trg_loc,
722 base,
723 MemOperand(temp.X()),
Andreas Gampe3db70682018-12-26 15:12:03 -0800724 /* needs_null_check= */ false,
Vladimir Marko248141f2018-08-10 10:40:07 +0100725 is_volatile);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800726 } else {
Roland Levillain44015862016-01-22 11:47:17 +0000727 // Other cases.
728 MemOperand mem_op(base.X(), offset);
729 if (is_volatile) {
Vladimir Marko98873af2020-12-16 12:10:03 +0000730 codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
Roland Levillain44015862016-01-22 11:47:17 +0000731 } else {
732 codegen->Load(type, trg, mem_op);
733 }
Roland Levillain4d027112015-07-01 15:41:14 +0100734
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100735 if (type == DataType::Type::kReference) {
Roland Levillain44015862016-01-22 11:47:17 +0000736 DCHECK(trg.IsW());
Roland Levillaina1aa3b12016-10-26 13:03:38 +0100737 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
Roland Levillain44015862016-01-22 11:47:17 +0000738 }
Roland Levillain4d027112015-07-01 15:41:14 +0100739 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800740}
741
Sorin Basca507cf902021-10-06 12:04:56 +0000742static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
743 switch (intrinsic) {
744 case Intrinsics::kUnsafeGetObject:
745 case Intrinsics::kUnsafeGetObjectVolatile:
746 case Intrinsics::kJdkUnsafeGetObject:
747 case Intrinsics::kJdkUnsafeGetObjectVolatile:
748 case Intrinsics::kJdkUnsafeGetObjectAcquire:
749 return true;
750 default:
751 break;
752 }
753 return false;
754}
755
Vladimir Markoca6fff82017-10-03 14:49:14 +0100756static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Sorin Basca507cf902021-10-06 12:04:56 +0000757 bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
Vladimir Markoca6fff82017-10-03 14:49:14 +0100758 LocationSummary* locations =
759 new (allocator) LocationSummary(invoke,
760 can_call
761 ? LocationSummary::kCallOnSlowPath
762 : LocationSummary::kNoCall,
763 kIntrinsified);
Vladimir Marko70e97462016-08-09 11:04:26 +0100764 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko804b03f2016-09-14 16:26:36 +0100765 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
Vladimir Marko248141f2018-08-10 10:40:07 +0100766 // We need a temporary register for the read barrier load in order to use
767 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
768 locations->AddTemp(FixedTempLocation());
Vladimir Marko70e97462016-08-09 11:04:26 +0100769 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800770 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
771 locations->SetInAt(1, Location::RequiresRegister());
772 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillainbfea3352016-06-23 13:48:47 +0100773 locations->SetOut(Location::RequiresRegister(),
Roland Levillaina1aa3b12016-10-26 13:03:38 +0100774 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800775}
776
777void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000778 VisitJdkUnsafeGet(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800779}
780void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000781 VisitJdkUnsafeGetVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800782}
783void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000784 VisitJdkUnsafeGetLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800785}
786void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000787 VisitJdkUnsafeGetLongVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800788}
789void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000790 VisitJdkUnsafeGetObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800791}
792void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000793 VisitJdkUnsafeGetObjectVolatile(invoke);
794}
795
796void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
797 CreateIntIntIntToIntLocations(allocator_, invoke);
798}
799void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
800 CreateIntIntIntToIntLocations(allocator_, invoke);
801}
Sorin Basca0069ad72021-09-17 17:33:09 +0000802void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
803 CreateIntIntIntToIntLocations(allocator_, invoke);
804}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000805void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
806 CreateIntIntIntToIntLocations(allocator_, invoke);
807}
808void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
809 CreateIntIntIntToIntLocations(allocator_, invoke);
810}
Sorin Basca507cf902021-10-06 12:04:56 +0000811void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
812 CreateIntIntIntToIntLocations(allocator_, invoke);
813}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000814void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
815 CreateIntIntIntToIntLocations(allocator_, invoke);
816}
817void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100818 CreateIntIntIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800819}
Sorin Basca507cf902021-10-06 12:04:56 +0000820void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
821 CreateIntIntIntToIntLocations(allocator_, invoke);
822}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800823
824void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000825 VisitJdkUnsafeGet(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800826}
827void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000828 VisitJdkUnsafeGetVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800829}
830void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000831 VisitJdkUnsafeGetLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800832}
833void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000834 VisitJdkUnsafeGetLongVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800835}
836void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000837 VisitJdkUnsafeGetObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800838}
839void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000840 VisitJdkUnsafeGetObjectVolatile(invoke);
841}
842
843void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
844 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
845}
846void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
847 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
848}
Sorin Basca0069ad72021-09-17 17:33:09 +0000849void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
850 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
851}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000852void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
853 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
854}
855void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
856 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
857}
Sorin Basca507cf902021-10-06 12:04:56 +0000858void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
859 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
860}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000861void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
862 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
863}
864void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
865 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800866}
Sorin Basca507cf902021-10-06 12:04:56 +0000867void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
868 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
869}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800870
Vladimir Markoca6fff82017-10-03 14:49:14 +0100871static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
872 LocationSummary* locations =
873 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800874 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
875 locations->SetInAt(1, Location::RequiresRegister());
876 locations->SetInAt(2, Location::RequiresRegister());
877 locations->SetInAt(3, Location::RequiresRegister());
878}
879
880void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000881 VisitJdkUnsafePut(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800882}
883void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000884 VisitJdkUnsafePutOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800885}
886void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000887 VisitJdkUnsafePutVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800888}
889void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000890 VisitJdkUnsafePutObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800891}
892void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000893 VisitJdkUnsafePutObjectOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800894}
895void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000896 VisitJdkUnsafePutObjectVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800897}
898void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000899 VisitJdkUnsafePutLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800900}
901void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000902 VisitJdkUnsafePutLongOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800903}
904void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000905 VisitJdkUnsafePutLongVolatile(invoke);
906}
907
908void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) {
909 CreateIntIntIntIntToVoid(allocator_, invoke);
910}
911void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
912 CreateIntIntIntIntToVoid(allocator_, invoke);
913}
914void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
915 CreateIntIntIntIntToVoid(allocator_, invoke);
916}
Sorin Basca0069ad72021-09-17 17:33:09 +0000917void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
918 CreateIntIntIntIntToVoid(allocator_, invoke);
919}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000920void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
921 CreateIntIntIntIntToVoid(allocator_, invoke);
922}
923void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
924 CreateIntIntIntIntToVoid(allocator_, invoke);
925}
926void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
927 CreateIntIntIntIntToVoid(allocator_, invoke);
928}
Sorin Basca507cf902021-10-06 12:04:56 +0000929void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
930 CreateIntIntIntIntToVoid(allocator_, invoke);
931}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000932void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
933 CreateIntIntIntIntToVoid(allocator_, invoke);
934}
935void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
936 CreateIntIntIntIntToVoid(allocator_, invoke);
937}
938void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100939 CreateIntIntIntIntToVoid(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800940}
Sorin Basca507cf902021-10-06 12:04:56 +0000941void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
942 CreateIntIntIntIntToVoid(allocator_, invoke);
943}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800944
Artem Serov914d7a82017-02-07 14:33:49 +0000945static void GenUnsafePut(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100946 DataType::Type type,
Andreas Gampe878d58c2015-01-15 23:24:00 -0800947 bool is_volatile,
948 bool is_ordered,
949 CodeGeneratorARM64* codegen) {
Artem Serov914d7a82017-02-07 14:33:49 +0000950 LocationSummary* locations = invoke->GetLocations();
Alexandre Rames087930f2016-08-02 13:45:28 +0100951 MacroAssembler* masm = codegen->GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800952
953 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
954 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
955 Register value = RegisterFrom(locations->InAt(3), type);
Roland Levillain4d027112015-07-01 15:41:14 +0100956 Register source = value;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800957 MemOperand mem_op(base.X(), offset);
958
Roland Levillain4d027112015-07-01 15:41:14 +0100959 {
960 // We use a block to end the scratch scope before the write barrier, thus
961 // freeing the temporary registers so they can be used in `MarkGCCard`.
962 UseScratchRegisterScope temps(masm);
963
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100964 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
Roland Levillain4d027112015-07-01 15:41:14 +0100965 DCHECK(value.IsW());
966 Register temp = temps.AcquireW();
967 __ Mov(temp.W(), value.W());
968 codegen->GetAssembler()->PoisonHeapReference(temp.W());
969 source = temp;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800970 }
Roland Levillain4d027112015-07-01 15:41:14 +0100971
972 if (is_volatile || is_ordered) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800973 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
Roland Levillain4d027112015-07-01 15:41:14 +0100974 } else {
975 codegen->Store(type, source, mem_op);
976 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800977 }
978
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100979 if (type == DataType::Type::kReference) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +0100980 bool value_can_be_null = true; // TODO: Worth finding out this information?
981 codegen->MarkGCCard(base, value, value_can_be_null);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800982 }
983}
984
985void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000986 VisitJdkUnsafePut(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800987}
988void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000989 VisitJdkUnsafePutOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800990}
991void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000992 VisitJdkUnsafePutVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800993}
994void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000995 VisitJdkUnsafePutObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800996}
997void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000998 VisitJdkUnsafePutObjectOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800999}
1000void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001001 VisitJdkUnsafePutObjectVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001002}
1003void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001004 VisitJdkUnsafePutLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001005}
1006void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001007 VisitJdkUnsafePutLongOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001008}
1009void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001010 VisitJdkUnsafePutLongVolatile(invoke);
1011}
1012
1013void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) {
1014 GenUnsafePut(invoke,
1015 DataType::Type::kInt32,
1016 /*is_volatile=*/ false,
1017 /*is_ordered=*/ false,
1018 codegen_);
1019}
1020void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1021 GenUnsafePut(invoke,
1022 DataType::Type::kInt32,
1023 /*is_volatile=*/ false,
1024 /*is_ordered=*/ true,
1025 codegen_);
1026}
1027void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1028 GenUnsafePut(invoke,
1029 DataType::Type::kInt32,
1030 /*is_volatile=*/ true,
1031 /*is_ordered=*/ false,
1032 codegen_);
1033}
Sorin Basca0069ad72021-09-17 17:33:09 +00001034void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1035 GenUnsafePut(invoke,
1036 DataType::Type::kInt32,
1037 /*is_volatile=*/ true,
1038 /*is_ordered=*/ false,
1039 codegen_);
1040}
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001041void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
1042 GenUnsafePut(invoke,
1043 DataType::Type::kReference,
1044 /*is_volatile=*/ false,
1045 /*is_ordered=*/ false,
1046 codegen_);
1047}
1048void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1049 GenUnsafePut(invoke,
1050 DataType::Type::kReference,
1051 /*is_volatile=*/ false,
1052 /*is_ordered=*/ true,
1053 codegen_);
1054}
1055void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
1056 GenUnsafePut(invoke,
1057 DataType::Type::kReference,
1058 /*is_volatile=*/ true,
1059 /*is_ordered=*/ false,
1060 codegen_);
1061}
Sorin Basca507cf902021-10-06 12:04:56 +00001062void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
1063 GenUnsafePut(invoke,
1064 DataType::Type::kReference,
1065 /*is_volatile=*/ true,
1066 /*is_ordered=*/ false,
1067 codegen_);
1068}
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001069void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
Artem Serov914d7a82017-02-07 14:33:49 +00001070 GenUnsafePut(invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001071 DataType::Type::kInt64,
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001072 /*is_volatile=*/ false,
1073 /*is_ordered=*/ false,
1074 codegen_);
1075}
1076void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1077 GenUnsafePut(invoke,
1078 DataType::Type::kInt64,
1079 /*is_volatile=*/ false,
1080 /*is_ordered=*/ true,
1081 codegen_);
1082}
1083void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1084 GenUnsafePut(invoke,
1085 DataType::Type::kInt64,
1086 /*is_volatile=*/ true,
1087 /*is_ordered=*/ false,
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001088 codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001089}
Sorin Basca507cf902021-10-06 12:04:56 +00001090void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1091 GenUnsafePut(invoke,
1092 DataType::Type::kInt64,
1093 /*is_volatile=*/ true,
1094 /*is_ordered=*/ false,
1095 codegen_);
1096}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001097
Vladimir Markoe17530a2020-11-11 17:02:26 +00001098static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001099 bool can_call = kEmitCompilerReadBarrier &&
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001100 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject ||
1101 invoke->GetIntrinsic() == Intrinsics::kJdkUnsafeCASObject);
Vladimir Markoca6fff82017-10-03 14:49:14 +01001102 LocationSummary* locations =
1103 new (allocator) LocationSummary(invoke,
1104 can_call
1105 ? LocationSummary::kCallOnSlowPath
1106 : LocationSummary::kNoCall,
1107 kIntrinsified);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001108 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko94796f82018-08-08 15:15:33 +01001109 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1110 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001111 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1112 locations->SetInAt(1, Location::RequiresRegister());
1113 locations->SetInAt(2, Location::RequiresRegister());
1114 locations->SetInAt(3, Location::RequiresRegister());
1115 locations->SetInAt(4, Location::RequiresRegister());
1116
Vladimir Marko94796f82018-08-08 15:15:33 +01001117 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001118}
1119
1120static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
1121 DataType::Type type,
1122 Register ptr,
1123 Register old_value,
1124 bool use_load_acquire) {
1125 Arm64Assembler* assembler = codegen->GetAssembler();
1126 MacroAssembler* masm = assembler->GetVIXLAssembler();
1127 switch (type) {
1128 case DataType::Type::kBool:
Vladimir Marko98873af2020-12-16 12:10:03 +00001129 case DataType::Type::kUint8:
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001130 case DataType::Type::kInt8:
1131 if (use_load_acquire) {
1132 __ Ldaxrb(old_value, MemOperand(ptr));
1133 } else {
1134 __ Ldxrb(old_value, MemOperand(ptr));
1135 }
1136 break;
1137 case DataType::Type::kUint16:
1138 case DataType::Type::kInt16:
1139 if (use_load_acquire) {
1140 __ Ldaxrh(old_value, MemOperand(ptr));
1141 } else {
1142 __ Ldxrh(old_value, MemOperand(ptr));
1143 }
1144 break;
1145 case DataType::Type::kInt32:
1146 case DataType::Type::kInt64:
1147 case DataType::Type::kReference:
1148 if (use_load_acquire) {
1149 __ Ldaxr(old_value, MemOperand(ptr));
1150 } else {
1151 __ Ldxr(old_value, MemOperand(ptr));
1152 }
1153 break;
1154 default:
1155 LOG(FATAL) << "Unexpected type: " << type;
1156 UNREACHABLE();
1157 }
1158 switch (type) {
1159 case DataType::Type::kInt8:
1160 __ Sxtb(old_value, old_value);
1161 break;
1162 case DataType::Type::kInt16:
1163 __ Sxth(old_value, old_value);
1164 break;
1165 case DataType::Type::kReference:
1166 assembler->MaybeUnpoisonHeapReference(old_value);
1167 break;
1168 default:
1169 break;
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001170 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001171}
1172
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001173static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1174 DataType::Type type,
1175 Register ptr,
1176 Register store_result,
1177 Register new_value,
1178 bool use_store_release) {
1179 Arm64Assembler* assembler = codegen->GetAssembler();
1180 MacroAssembler* masm = assembler->GetVIXLAssembler();
1181 if (type == DataType::Type::kReference) {
1182 assembler->MaybePoisonHeapReference(new_value);
1183 }
1184 switch (type) {
1185 case DataType::Type::kBool:
Vladimir Marko98873af2020-12-16 12:10:03 +00001186 case DataType::Type::kUint8:
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001187 case DataType::Type::kInt8:
1188 if (use_store_release) {
1189 __ Stlxrb(store_result, new_value, MemOperand(ptr));
1190 } else {
1191 __ Stxrb(store_result, new_value, MemOperand(ptr));
1192 }
1193 break;
1194 case DataType::Type::kUint16:
1195 case DataType::Type::kInt16:
1196 if (use_store_release) {
1197 __ Stlxrh(store_result, new_value, MemOperand(ptr));
1198 } else {
1199 __ Stxrh(store_result, new_value, MemOperand(ptr));
1200 }
1201 break;
1202 case DataType::Type::kInt32:
1203 case DataType::Type::kInt64:
1204 case DataType::Type::kReference:
1205 if (use_store_release) {
1206 __ Stlxr(store_result, new_value, MemOperand(ptr));
1207 } else {
1208 __ Stxr(store_result, new_value, MemOperand(ptr));
1209 }
1210 break;
1211 default:
1212 LOG(FATAL) << "Unexpected type: " << type;
1213 UNREACHABLE();
1214 }
1215 if (type == DataType::Type::kReference) {
1216 assembler->MaybeUnpoisonHeapReference(new_value);
1217 }
1218}
Vladimir Marko94796f82018-08-08 15:15:33 +01001219
Vladimir Markoe17530a2020-11-11 17:02:26 +00001220static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1221 DataType::Type type,
1222 std::memory_order order,
1223 bool strong,
1224 vixl::aarch64::Label* cmp_failure,
1225 Register ptr,
1226 Register new_value,
1227 Register old_value,
1228 Register store_result,
1229 Register expected,
1230 Register expected2 = Register()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001231 // The `expected2` is valid only for reference slow path and represents the unmarked old value
1232 // from the main path attempt to emit CAS when the marked old value matched `expected`.
1233 DCHECK(type == DataType::Type::kReference || !expected2.IsValid());
1234
1235 DCHECK(ptr.IsX());
1236 DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1237 DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1238 DCHECK(store_result.IsW());
1239 DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
1240 DCHECK(!expected2.IsValid() || expected2.IsW());
1241
1242 Arm64Assembler* assembler = codegen->GetAssembler();
1243 MacroAssembler* masm = assembler->GetVIXLAssembler();
1244
1245 bool use_load_acquire =
1246 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1247 bool use_store_release =
1248 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1249 DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1250
1251 // repeat: {
1252 // old_value = [ptr]; // Load exclusive.
1253 // if (old_value != expected && old_value != expected2) goto cmp_failure;
1254 // store_result = failed([ptr] <- new_value); // Store exclusive.
1255 // }
1256 // if (strong) {
1257 // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
1258 // } else {
1259 // store_result = store_result ^ 1; // Report success as 1, failure as 0.
1260 // }
1261 //
1262 // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1263 // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1264
1265 vixl::aarch64::Label loop_head;
1266 if (strong) {
1267 __ Bind(&loop_head);
1268 }
1269 EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1270 __ Cmp(old_value, expected);
1271 if (expected2.IsValid()) {
1272 __ Ccmp(old_value, expected2, ZFlag, ne);
1273 }
Vladimir Markoc8178f52020-11-24 10:38:16 +00001274 // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001275 // If the comparison succeeded, the Z flag is set and remains set after the end of the
1276 // code emitted here, unless we retry the whole operation.
1277 __ B(cmp_failure, ne);
1278 EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1279 if (strong) {
1280 __ Cbnz(store_result, &loop_head);
1281 } else {
1282 // Flip the `store_result` register to indicate success by 1 and failure by 0.
1283 __ Eor(store_result, store_result, 1);
1284 }
1285}
1286
1287class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1288 public:
1289 ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1290 std::memory_order order,
1291 bool strong,
1292 Register base,
1293 Register offset,
1294 Register expected,
1295 Register new_value,
1296 Register old_value,
1297 Register old_value_temp,
1298 Register store_result,
1299 bool update_old_value,
1300 CodeGeneratorARM64* arm64_codegen)
1301 : SlowPathCodeARM64(invoke),
1302 order_(order),
1303 strong_(strong),
1304 base_(base),
1305 offset_(offset),
1306 expected_(expected),
1307 new_value_(new_value),
1308 old_value_(old_value),
1309 old_value_temp_(old_value_temp),
1310 store_result_(store_result),
1311 update_old_value_(update_old_value),
1312 mark_old_value_slow_path_(nullptr),
1313 update_old_value_slow_path_(nullptr) {
1314 if (!kUseBakerReadBarrier) {
1315 // We need to add the slow path now, it is too late when emitting slow path code.
1316 mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1317 invoke,
1318 Location::RegisterLocation(old_value_temp.GetCode()),
1319 Location::RegisterLocation(old_value.GetCode()),
1320 Location::RegisterLocation(base.GetCode()),
1321 /*offset=*/ 0u,
1322 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1323 if (update_old_value_) {
1324 update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1325 invoke,
1326 Location::RegisterLocation(old_value.GetCode()),
1327 Location::RegisterLocation(old_value_temp.GetCode()),
1328 Location::RegisterLocation(base.GetCode()),
1329 /*offset=*/ 0u,
1330 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1331 }
1332 }
1333 }
1334
1335 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
Vladimir Marko94796f82018-08-08 15:15:33 +01001336
Roland Levillainbbc6e7e2018-08-24 16:58:47 +01001337 void EmitNativeCode(CodeGenerator* codegen) override {
Vladimir Marko94796f82018-08-08 15:15:33 +01001338 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1339 Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1340 MacroAssembler* masm = assembler->GetVIXLAssembler();
1341 __ Bind(GetEntryLabel());
1342
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001343 // Mark the `old_value_` from the main path and compare with `expected_`.
1344 if (kUseBakerReadBarrier) {
1345 DCHECK(mark_old_value_slow_path_ == nullptr);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001346 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001347 } else {
1348 DCHECK(mark_old_value_slow_path_ != nullptr);
1349 __ B(mark_old_value_slow_path_->GetEntryLabel());
1350 __ Bind(mark_old_value_slow_path_->GetExitLabel());
1351 }
1352 __ Cmp(old_value_temp_, expected_);
1353 if (update_old_value_) {
1354 // Update the old value if we're going to return from the slow path.
1355 __ Csel(old_value_, old_value_temp_, old_value_, ne);
1356 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001357 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1358
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001359 // The `old_value` we have read did not match `expected` (which is always a to-space
1360 // reference) but after the read barrier the marked to-space value matched, so the
1361 // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1362 // as the main path but check for both `expected` and the unmarked old value
1363 // representing the to-space and from-space references for the same object.
Vladimir Marko94796f82018-08-08 15:15:33 +01001364
1365 UseScratchRegisterScope temps(masm);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001366 DCHECK(!store_result_.IsValid() || !temps.IsAvailable(store_result_));
Vladimir Marko94796f82018-08-08 15:15:33 +01001367 Register tmp_ptr = temps.AcquireX();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001368 Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
Vladimir Marko94796f82018-08-08 15:15:33 +01001369
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001370 // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1371 __ Add(tmp_ptr, base_.X(), Operand(offset_));
Vladimir Marko94796f82018-08-08 15:15:33 +01001372
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001373 vixl::aarch64::Label mark_old_value;
Vladimir Markoe17530a2020-11-11 17:02:26 +00001374 GenerateCompareAndSet(arm64_codegen,
1375 DataType::Type::kReference,
1376 order_,
1377 strong_,
1378 /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1379 tmp_ptr,
1380 new_value_,
1381 /*old_value=*/ old_value_temp_,
1382 store_result,
1383 expected_,
1384 /*expected2=*/ old_value_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001385 if (update_old_value_) {
1386 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1387 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1388 __ Mov(old_value_, expected_);
1389 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001390
Vladimir Markoe17530a2020-11-11 17:02:26 +00001391 // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001392 // For strong CAS, that's the overall success. For weak CAS, the code also needs
1393 // to check the `store_result` after returning from the slow path.
Vladimir Marko94796f82018-08-08 15:15:33 +01001394 __ B(GetExitLabel());
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001395
1396 if (update_old_value_) {
1397 __ Bind(&mark_old_value);
1398 if (kUseBakerReadBarrier) {
1399 DCHECK(update_old_value_slow_path_ == nullptr);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001400 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001401 } else {
1402 // Note: We could redirect the `failure` above directly to the entry label and bind
1403 // the exit label in the main path, but the main path would need to access the
1404 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1405 DCHECK(update_old_value_slow_path_ != nullptr);
1406 __ B(update_old_value_slow_path_->GetEntryLabel());
1407 __ Bind(update_old_value_slow_path_->GetExitLabel());
1408 }
1409 __ B(GetExitLabel());
1410 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001411 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001412
1413 private:
1414 std::memory_order order_;
1415 bool strong_;
1416 Register base_;
1417 Register offset_;
1418 Register expected_;
1419 Register new_value_;
1420 Register old_value_;
1421 Register old_value_temp_;
1422 Register store_result_;
1423 bool update_old_value_;
1424 SlowPathCodeARM64* mark_old_value_slow_path_;
1425 SlowPathCodeARM64* update_old_value_slow_path_;
Vladimir Marko94796f82018-08-08 15:15:33 +01001426};
1427
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001428static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1429 MacroAssembler* masm = codegen->GetVIXLAssembler();
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001430 LocationSummary* locations = invoke->GetLocations();
Andreas Gampe878d58c2015-01-15 23:24:00 -08001431
Vladimir Marko94796f82018-08-08 15:15:33 +01001432 Register out = WRegisterFrom(locations->Out()); // Boolean result.
1433 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1434 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1435 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001436 Register new_value = RegisterFrom(locations->InAt(4), type); // New value.
Andreas Gampe878d58c2015-01-15 23:24:00 -08001437
1438 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001439 if (type == DataType::Type::kReference) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001440 // Mark card for object assuming new value is stored.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001441 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
1442 codegen->MarkGCCard(base, new_value, new_value_can_be_null);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001443 }
1444
1445 UseScratchRegisterScope temps(masm);
1446 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
Vladimir Marko94796f82018-08-08 15:15:33 +01001447 Register old_value; // Value in memory.
Andreas Gampe878d58c2015-01-15 23:24:00 -08001448
Vladimir Marko94796f82018-08-08 15:15:33 +01001449 vixl::aarch64::Label exit_loop_label;
1450 vixl::aarch64::Label* exit_loop = &exit_loop_label;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001451 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
Vladimir Marko94796f82018-08-08 15:15:33 +01001452
1453 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001454 // We need to store the `old_value` in a non-scratch register to make sure
1455 // the read barrier in the slow path does not clobber it.
1456 old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
1457 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1458 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1459 Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1460 ReadBarrierCasSlowPathARM64* slow_path =
1461 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1462 invoke,
1463 std::memory_order_seq_cst,
1464 /*strong=*/ true,
1465 base,
1466 offset,
1467 expected,
1468 new_value,
1469 old_value,
1470 old_value_temp,
1471 /*store_result=*/ Register(), // Use a scratch register.
1472 /*update_old_value=*/ false,
1473 codegen);
Vladimir Marko94796f82018-08-08 15:15:33 +01001474 codegen->AddSlowPath(slow_path);
1475 exit_loop = slow_path->GetExitLabel();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001476 cmp_failure = slow_path->GetEntryLabel();
Vladimir Marko94796f82018-08-08 15:15:33 +01001477 } else {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001478 old_value = temps.AcquireSameSizeAs(new_value);
Vladimir Marko94796f82018-08-08 15:15:33 +01001479 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001480
1481 __ Add(tmp_ptr, base.X(), Operand(offset));
1482
Vladimir Markoe17530a2020-11-11 17:02:26 +00001483 GenerateCompareAndSet(codegen,
1484 type,
1485 std::memory_order_seq_cst,
1486 /*strong=*/ true,
1487 cmp_failure,
1488 tmp_ptr,
1489 new_value,
1490 old_value,
1491 /*store_result=*/ old_value.W(), // Reuse `old_value` for ST*XR* result.
1492 expected);
Vladimir Marko94796f82018-08-08 15:15:33 +01001493 __ Bind(exit_loop);
1494 __ Cset(out, eq);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001495}
1496
1497void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001498 VisitJdkUnsafeCASInt(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001499}
1500void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001501 VisitJdkUnsafeCASLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001502}
1503void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001504 VisitJdkUnsafeCASObject(invoke);
1505}
1506
1507void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1508 CreateUnsafeCASLocations(allocator_, invoke);
1509}
1510void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1511 CreateUnsafeCASLocations(allocator_, invoke);
1512}
1513void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001514 // The only read barrier implementation supporting the
Vladimir Marko436977d2020-11-12 12:41:06 +00001515 // UnsafeCASObject intrinsic is the Baker-style read barriers. b/173104084
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001516 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain985ff702015-10-23 13:25:35 +01001517 return;
1518 }
1519
Vladimir Markoe17530a2020-11-11 17:02:26 +00001520 CreateUnsafeCASLocations(allocator_, invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001521 if (kEmitCompilerReadBarrier) {
1522 // We need two non-scratch temporary registers for read barrier.
1523 LocationSummary* locations = invoke->GetLocations();
1524 if (kUseBakerReadBarrier) {
1525 locations->AddTemp(Location::RequiresRegister());
1526 locations->AddTemp(Location::RequiresRegister());
1527 } else {
1528 // To preserve the old value across the non-Baker read barrier
1529 // slow path, use a fixed callee-save register.
Vladimir Markoc8178f52020-11-24 10:38:16 +00001530 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1531 locations->AddTemp(Location::RegisterLocation(first_callee_save));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001532 // To reduce the number of moves, request x0 as the second temporary.
1533 DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1534 Location::RegisterLocation(x0.GetCode())));
1535 locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1536 }
1537 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001538}
Sorin Basca0069ad72021-09-17 17:33:09 +00001539void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1540 CreateUnsafeCASLocations(allocator_, invoke);
1541}
Sorin Basca507cf902021-10-06 12:04:56 +00001542void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1543 CreateUnsafeCASLocations(allocator_, invoke);
1544}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001545
1546void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001547 VisitJdkUnsafeCASInt(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001548}
1549void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001550 VisitJdkUnsafeCASLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001551}
1552void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001553 VisitJdkUnsafeCASObject(invoke);
1554}
1555
1556void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
1557 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1558}
1559void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
1560 GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1561}
1562void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001563 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001564}
Sorin Basca0069ad72021-09-17 17:33:09 +00001565void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1566 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1567}
Sorin Basca507cf902021-10-06 12:04:56 +00001568void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1569 GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1570}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001571
Vladimir Markoe1510d42020-11-13 11:07:13 +00001572enum class GetAndUpdateOp {
1573 kSet,
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001574 kAdd,
Vladimir Marko98873af2020-12-16 12:10:03 +00001575 kAddWithByteSwap,
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001576 kAnd,
1577 kOr,
1578 kXor
Vladimir Markoe1510d42020-11-13 11:07:13 +00001579};
1580
1581static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1582 GetAndUpdateOp get_and_update_op,
1583 DataType::Type load_store_type,
1584 std::memory_order order,
1585 Register ptr,
1586 CPURegister arg,
1587 CPURegister old_value) {
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001588 MacroAssembler* masm = codegen->GetVIXLAssembler();
1589 UseScratchRegisterScope temps(masm);
1590 Register store_result = temps.AcquireW();
1591
Vladimir Markoe1510d42020-11-13 11:07:13 +00001592 DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1593 Register old_value_reg;
1594 Register new_value;
1595 switch (get_and_update_op) {
1596 case GetAndUpdateOp::kSet:
1597 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1598 new_value = arg.IsX() ? arg.X() : arg.W();
1599 break;
Vladimir Marko98873af2020-12-16 12:10:03 +00001600 case GetAndUpdateOp::kAddWithByteSwap:
Vladimir Markoe1510d42020-11-13 11:07:13 +00001601 case GetAndUpdateOp::kAdd:
1602 if (arg.IsVRegister()) {
1603 old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1604 new_value = old_value_reg; // Use the same temporary.
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001605 break;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001606 }
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001607 FALLTHROUGH_INTENDED;
1608 case GetAndUpdateOp::kAnd:
1609 case GetAndUpdateOp::kOr:
1610 case GetAndUpdateOp::kXor:
1611 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1612 new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
Vladimir Markoe1510d42020-11-13 11:07:13 +00001613 break;
1614 }
1615
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001616 bool use_load_acquire =
1617 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1618 bool use_store_release =
1619 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1620 DCHECK(use_load_acquire || use_store_release);
1621
1622 vixl::aarch64::Label loop_label;
1623 __ Bind(&loop_label);
Vladimir Markoe1510d42020-11-13 11:07:13 +00001624 EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1625 switch (get_and_update_op) {
1626 case GetAndUpdateOp::kSet:
1627 break;
Vladimir Marko98873af2020-12-16 12:10:03 +00001628 case GetAndUpdateOp::kAddWithByteSwap:
1629 // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1630 // instead of `kInt16` and do the sign-extension explicitly afterwards.
1631 DCHECK_NE(load_store_type, DataType::Type::kInt16);
1632 GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1633 FALLTHROUGH_INTENDED;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001634 case GetAndUpdateOp::kAdd:
1635 if (arg.IsVRegister()) {
1636 VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1637 VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1638 __ Fmov(old_value_vreg, old_value_reg);
1639 __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1640 __ Fmov(new_value, sum);
1641 } else {
1642 __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1643 }
Vladimir Marko98873af2020-12-16 12:10:03 +00001644 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1645 GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1646 }
Vladimir Markoe1510d42020-11-13 11:07:13 +00001647 break;
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001648 case GetAndUpdateOp::kAnd:
1649 __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1650 break;
1651 case GetAndUpdateOp::kOr:
1652 __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1653 break;
1654 case GetAndUpdateOp::kXor:
1655 __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1656 break;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001657 }
1658 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001659 __ Cbnz(store_result, &loop_label);
1660}
1661
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001662void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01001663 LocationSummary* locations =
1664 new (allocator_) LocationSummary(invoke,
1665 invoke->InputAt(1)->CanBeNull()
1666 ? LocationSummary::kCallOnSlowPath
1667 : LocationSummary::kNoCall,
1668 kIntrinsified);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001669 locations->SetInAt(0, Location::RequiresRegister());
1670 locations->SetInAt(1, Location::RequiresRegister());
1671 locations->AddTemp(Location::RequiresRegister());
1672 locations->AddTemp(Location::RequiresRegister());
1673 locations->AddTemp(Location::RequiresRegister());
jessicahandojo05765752016-09-09 19:01:32 -07001674 // Need temporary registers for String compression's feature.
1675 if (mirror::kUseStringCompression) {
1676 locations->AddTemp(Location::RequiresRegister());
jessicahandojo05765752016-09-09 19:01:32 -07001677 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001678 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001679}
1680
1681void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01001682 MacroAssembler* masm = GetVIXLAssembler();
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001683 LocationSummary* locations = invoke->GetLocations();
1684
Alexandre Rames2ea91532016-08-11 17:04:14 +01001685 Register str = InputRegisterAt(invoke, 0);
1686 Register arg = InputRegisterAt(invoke, 1);
1687 DCHECK(str.IsW());
1688 DCHECK(arg.IsW());
Scott Wakeling1f36f412016-04-21 11:13:45 +01001689 Register out = OutputRegister(invoke);
1690
1691 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1692 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1693 Register temp2 = WRegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001694 Register temp3;
jessicahandojo05765752016-09-09 19:01:32 -07001695 if (mirror::kUseStringCompression) {
1696 temp3 = WRegisterFrom(locations->GetTemp(3));
jessicahandojo05765752016-09-09 19:01:32 -07001697 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001698
Scott Wakeling97c72b72016-06-24 16:19:36 +01001699 vixl::aarch64::Label loop;
1700 vixl::aarch64::Label find_char_diff;
1701 vixl::aarch64::Label end;
jessicahandojo05765752016-09-09 19:01:32 -07001702 vixl::aarch64::Label different_compression;
Scott Wakeling1f36f412016-04-21 11:13:45 +01001703
1704 // Get offsets of count and value fields within a string object.
1705 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1706 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1707
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001708 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001709 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001710
Scott Wakeling1f36f412016-04-21 11:13:45 +01001711 // Take slow path and throw if input can be and is null.
1712 SlowPathCodeARM64* slow_path = nullptr;
1713 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1714 if (can_slow_path) {
Vladimir Marko174b2e22017-10-12 13:34:49 +01001715 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001716 codegen_->AddSlowPath(slow_path);
1717 __ Cbz(arg, slow_path->GetEntryLabel());
1718 }
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001719
Scott Wakeling1f36f412016-04-21 11:13:45 +01001720 // Reference equality check, return 0 if same reference.
1721 __ Subs(out, str, arg);
1722 __ B(&end, eq);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001723
jessicahandojo05765752016-09-09 19:01:32 -07001724 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001725 // Load `count` fields of this and argument strings.
jessicahandojo05765752016-09-09 19:01:32 -07001726 __ Ldr(temp3, HeapOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001727 __ Ldr(temp2, HeapOperand(arg, count_offset));
jessicahandojo05765752016-09-09 19:01:32 -07001728 // Clean out compression flag from lengths.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001729 __ Lsr(temp0, temp3, 1u);
1730 __ Lsr(temp1, temp2, 1u);
jessicahandojo05765752016-09-09 19:01:32 -07001731 } else {
1732 // Load lengths of this and argument strings.
1733 __ Ldr(temp0, HeapOperand(str, count_offset));
1734 __ Ldr(temp1, HeapOperand(arg, count_offset));
1735 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001736 // out = length diff.
1737 __ Subs(out, temp0, temp1);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001738 // temp0 = min(len(str), len(arg)).
1739 __ Csel(temp0, temp1, temp0, ge);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001740 // Shorter string is empty?
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001741 __ Cbz(temp0, &end);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001742
jessicahandojo05765752016-09-09 19:01:32 -07001743 if (mirror::kUseStringCompression) {
1744 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001745 __ Eor(temp2, temp2, Operand(temp3));
1746 // Interleave with compression flag extraction which is needed for both paths
1747 // and also set flags which is needed only for the different compressions path.
1748 __ Ands(temp3.W(), temp3.W(), Operand(1));
1749 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
jessicahandojo05765752016-09-09 19:01:32 -07001750 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001751 // Store offset of string value in preparation for comparison loop.
1752 __ Mov(temp1, value_offset);
jessicahandojo05765752016-09-09 19:01:32 -07001753 if (mirror::kUseStringCompression) {
1754 // For string compression, calculate the number of bytes to compare (not chars).
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001755 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1756 __ Lsl(temp0, temp0, temp3);
jessicahandojo05765752016-09-09 19:01:32 -07001757 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001758
1759 UseScratchRegisterScope scratch_scope(masm);
1760 Register temp4 = scratch_scope.AcquireX();
1761
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001762 // Assertions that must hold in order to compare strings 8 bytes at a time.
Scott Wakeling1f36f412016-04-21 11:13:45 +01001763 DCHECK_ALIGNED(value_offset, 8);
1764 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1765
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001766 const size_t char_size = DataType::Size(DataType::Type::kUint16);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001767 DCHECK_EQ(char_size, 2u);
1768
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001769 // Promote temp2 to an X reg, ready for LDR.
1770 temp2 = temp2.X();
Scott Wakeling1f36f412016-04-21 11:13:45 +01001771
1772 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1773 __ Bind(&loop);
Alexandre Rames2ea91532016-08-11 17:04:14 +01001774 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001775 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1776 __ Cmp(temp4, temp2);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001777 __ B(ne, &find_char_diff);
1778 __ Add(temp1, temp1, char_size * 4);
jessicahandojo05765752016-09-09 19:01:32 -07001779 // With string compression, we have compared 8 bytes, otherwise 4 chars.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001780 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1781 __ B(&loop, hi);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001782 __ B(&end);
1783
1784 // Promote temp1 to an X reg, ready for EOR.
1785 temp1 = temp1.X();
1786
jessicahandojo05765752016-09-09 19:01:32 -07001787 // Find the single character difference.
Scott Wakeling1f36f412016-04-21 11:13:45 +01001788 __ Bind(&find_char_diff);
1789 // Get the bit position of the first character that differs.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001790 __ Eor(temp1, temp2, temp4);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001791 __ Rbit(temp1, temp1);
1792 __ Clz(temp1, temp1);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001793
jessicahandojo05765752016-09-09 19:01:32 -07001794 // If the number of chars remaining <= the index where the difference occurs (0-3), then
Scott Wakeling1f36f412016-04-21 11:13:45 +01001795 // the difference occurs outside the remaining string data, so just return length diff (out).
jessicahandojo05765752016-09-09 19:01:32 -07001796 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1797 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1798 // unsigned when string compression is disabled.
1799 // When it's enabled, the comparison must be unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001800 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
jessicahandojo05765752016-09-09 19:01:32 -07001801 __ B(ls, &end);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001802
Scott Wakeling1f36f412016-04-21 11:13:45 +01001803 // Extract the characters and calculate the difference.
jessicahandojo05765752016-09-09 19:01:32 -07001804 if (mirror:: kUseStringCompression) {
jessicahandojo05765752016-09-09 19:01:32 -07001805 __ Bic(temp1, temp1, 0x7);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001806 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1807 } else {
1808 __ Bic(temp1, temp1, 0xf);
jessicahandojo05765752016-09-09 19:01:32 -07001809 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001810 __ Lsr(temp2, temp2, temp1);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001811 __ Lsr(temp4, temp4, temp1);
jessicahandojo05765752016-09-09 19:01:32 -07001812 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001813 // Prioritize the case of compressed strings and calculate such result first.
1814 __ Uxtb(temp1, temp4);
1815 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1816 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
jessicahandojo05765752016-09-09 19:01:32 -07001817 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001818 __ Uxth(temp4, temp4);
1819 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
jessicahandojo05765752016-09-09 19:01:32 -07001820
1821 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001822 __ B(&end);
1823 __ Bind(&different_compression);
1824
1825 // Comparison for different compression style.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001826 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
jessicahandojo05765752016-09-09 19:01:32 -07001827 DCHECK_EQ(c_char_size, 1u);
jessicahandojo05765752016-09-09 19:01:32 -07001828 temp1 = temp1.W();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001829 temp2 = temp2.W();
1830 temp4 = temp4.W();
jessicahandojo05765752016-09-09 19:01:32 -07001831
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001832 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1833 // Note that flags have been set by the `str` compression flag extraction to `temp3`
1834 // before branching to the `different_compression` label.
1835 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
1836 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
jessicahandojo05765752016-09-09 19:01:32 -07001837
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001838 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1839 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1840 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1841 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1842 __ Lsl(temp0, temp0, 1u);
1843
1844 // Adjust temp1 and temp2 from string pointers to data pointers.
1845 __ Add(temp1, temp1, Operand(value_offset));
1846 __ Add(temp2, temp2, Operand(value_offset));
1847
1848 // Complete the move of the compression flag.
1849 __ Sub(temp0, temp0, Operand(temp3));
1850
1851 vixl::aarch64::Label different_compression_loop;
1852 vixl::aarch64::Label different_compression_diff;
1853
1854 __ Bind(&different_compression_loop);
1855 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1856 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1857 __ Subs(temp4, temp4, Operand(temp3));
1858 __ B(&different_compression_diff, ne);
1859 __ Subs(temp0, temp0, 2);
1860 __ B(&different_compression_loop, hi);
jessicahandojo05765752016-09-09 19:01:32 -07001861 __ B(&end);
1862
1863 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001864 __ Bind(&different_compression_diff);
1865 __ Tst(temp0, Operand(1));
1866 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1867 "Expecting 0=compressed, 1=uncompressed");
1868 __ Cneg(out, temp4, ne);
jessicahandojo05765752016-09-09 19:01:32 -07001869 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001870
1871 __ Bind(&end);
1872
1873 if (can_slow_path) {
1874 __ Bind(slow_path->GetExitLabel());
1875 }
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001876}
1877
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001878// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1879// The normal loop plus the pre-header is 9 instructions without string compression and 12
1880// instructions with string compression. We can compare up to 8 bytes in 4 instructions
1881// (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1882// to 10 instructions for the unrolled loop.
1883constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1884
1885static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1886 if (candidate->IsLoadString()) {
1887 HLoadString* load_string = candidate->AsLoadString();
1888 const DexFile& dex_file = load_string->GetDexFile();
1889 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1890 }
1891 return nullptr;
1892}
1893
Agi Csakiea34b402015-08-13 17:51:19 -07001894void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01001895 LocationSummary* locations =
1896 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Agi Csakiea34b402015-08-13 17:51:19 -07001897 locations->SetInAt(0, Location::RequiresRegister());
1898 locations->SetInAt(1, Location::RequiresRegister());
Agi Csakiea34b402015-08-13 17:51:19 -07001899
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001900 // For the generic implementation and for long const strings we need a temporary.
1901 // We do not need it for short const strings, up to 8 bytes, see code generation below.
1902 uint32_t const_string_length = 0u;
1903 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1904 if (const_string == nullptr) {
1905 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1906 }
1907 bool is_compressed =
1908 mirror::kUseStringCompression &&
1909 const_string != nullptr &&
1910 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1911 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1912 locations->AddTemp(Location::RequiresRegister());
1913 }
1914
1915 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1916 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1917 // Then we shall need an extra temporary register instead of the output register.
Agi Csakiea34b402015-08-13 17:51:19 -07001918 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1919}
1920
1921void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01001922 MacroAssembler* masm = GetVIXLAssembler();
Agi Csakiea34b402015-08-13 17:51:19 -07001923 LocationSummary* locations = invoke->GetLocations();
1924
1925 Register str = WRegisterFrom(locations->InAt(0));
1926 Register arg = WRegisterFrom(locations->InAt(1));
1927 Register out = XRegisterFrom(locations->Out());
1928
1929 UseScratchRegisterScope scratch_scope(masm);
1930 Register temp = scratch_scope.AcquireW();
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001931 Register temp1 = scratch_scope.AcquireW();
Agi Csakiea34b402015-08-13 17:51:19 -07001932
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001933 vixl::aarch64::Label loop;
Scott Wakeling97c72b72016-06-24 16:19:36 +01001934 vixl::aarch64::Label end;
1935 vixl::aarch64::Label return_true;
1936 vixl::aarch64::Label return_false;
Agi Csakiea34b402015-08-13 17:51:19 -07001937
1938 // Get offsets of count, value, and class fields within a string object.
1939 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1940 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1941 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1942
1943 // Note that the null check must have been done earlier.
1944 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1945
Vladimir Marko53b52002016-05-24 19:30:45 +01001946 StringEqualsOptimizations optimizations(invoke);
1947 if (!optimizations.GetArgumentNotNull()) {
1948 // Check if input is null, return false if it is.
1949 __ Cbz(arg, &return_false);
1950 }
Agi Csakiea34b402015-08-13 17:51:19 -07001951
1952 // Reference equality check, return true if same reference.
1953 __ Cmp(str, arg);
1954 __ B(&return_true, eq);
1955
Vladimir Marko53b52002016-05-24 19:30:45 +01001956 if (!optimizations.GetArgumentIsString()) {
1957 // Instanceof check for the argument by comparing class fields.
1958 // All string objects must have the same type since String cannot be subclassed.
1959 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1960 // If the argument is a string object, its class field must be equal to receiver's class field.
Roland Levillain1d775d22018-09-07 13:56:57 +01001961 //
1962 // As the String class is expected to be non-movable, we can read the class
1963 // field from String.equals' arguments without read barriers.
1964 AssertNonMovableStringClass();
1965 // /* HeapReference<Class> */ temp = str->klass_
Vladimir Marko53b52002016-05-24 19:30:45 +01001966 __ Ldr(temp, MemOperand(str.X(), class_offset));
Roland Levillain1d775d22018-09-07 13:56:57 +01001967 // /* HeapReference<Class> */ temp1 = arg->klass_
Vladimir Marko53b52002016-05-24 19:30:45 +01001968 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
Roland Levillain1d775d22018-09-07 13:56:57 +01001969 // Also, because we use the previously loaded class references only in the
1970 // following comparison, we don't need to unpoison them.
Vladimir Marko53b52002016-05-24 19:30:45 +01001971 __ Cmp(temp, temp1);
1972 __ B(&return_false, ne);
1973 }
Agi Csakiea34b402015-08-13 17:51:19 -07001974
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001975 // Check if one of the inputs is a const string. Do not special-case both strings
1976 // being const, such cases should be handled by constant folding if needed.
1977 uint32_t const_string_length = 0u;
1978 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1979 if (const_string == nullptr) {
1980 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1981 if (const_string != nullptr) {
1982 std::swap(str, arg); // Make sure the const string is in `str`.
1983 }
1984 }
1985 bool is_compressed =
1986 mirror::kUseStringCompression &&
1987 const_string != nullptr &&
1988 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1989
1990 if (const_string != nullptr) {
1991 // Load `count` field of the argument string and check if it matches the const string.
1992 // Also compares the compression style, if differs return false.
1993 __ Ldr(temp, MemOperand(arg.X(), count_offset));
Vladimir Marko26ec3ca2017-03-14 13:37:14 +00001994 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1995 scratch_scope.Release(temp1);
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001996 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
Vladimir Marko26ec3ca2017-03-14 13:37:14 +00001997 temp1 = scratch_scope.AcquireW();
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001998 __ B(&return_false, ne);
1999 } else {
2000 // Load `count` fields of this and argument strings.
2001 __ Ldr(temp, MemOperand(str.X(), count_offset));
2002 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
2003 // Check if `count` fields are equal, return false if they're not.
2004 // Also compares the compression style, if differs return false.
2005 __ Cmp(temp, temp1);
2006 __ B(&return_false, ne);
2007 }
Agi Csakiea34b402015-08-13 17:51:19 -07002008
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002009 // Assertions that must hold in order to compare strings 8 bytes at a time.
Vladimir Marko984519c2017-08-23 10:45:29 +01002010 // Ok to do this because strings are zero-padded to kObjectAlignment.
Agi Csakiea34b402015-08-13 17:51:19 -07002011 DCHECK_ALIGNED(value_offset, 8);
2012 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
2013
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002014 if (const_string != nullptr &&
Vladimir Marko984519c2017-08-23 10:45:29 +01002015 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
2016 : kShortConstStringEqualsCutoffInBytes / 2u)) {
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002017 // Load and compare the contents. Though we know the contents of the short const string
2018 // at compile time, materializing constants may be more code than loading from memory.
2019 int32_t offset = value_offset;
2020 size_t remaining_bytes =
2021 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
2022 temp = temp.X();
2023 temp1 = temp1.X();
Vladimir Marko984519c2017-08-23 10:45:29 +01002024 while (remaining_bytes > sizeof(uint64_t)) {
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002025 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2026 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
2027 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
2028 __ Cmp(temp, temp2);
2029 __ Ccmp(temp1, out, NoFlag, eq);
2030 __ B(&return_false, ne);
2031 offset += 2u * sizeof(uint64_t);
2032 remaining_bytes -= 2u * sizeof(uint64_t);
2033 }
2034 if (remaining_bytes != 0u) {
2035 __ Ldr(temp, MemOperand(str.X(), offset));
2036 __ Ldr(temp1, MemOperand(arg.X(), offset));
2037 __ Cmp(temp, temp1);
2038 __ B(&return_false, ne);
2039 }
2040 } else {
2041 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
2042 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2043 "Expecting 0=compressed, 1=uncompressed");
2044 __ Cbz(temp, &return_true);
2045
2046 if (mirror::kUseStringCompression) {
2047 // For string compression, calculate the number of bytes to compare (not chars).
2048 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
2049 __ And(temp1, temp, Operand(1)); // Extract compression flag.
2050 __ Lsr(temp, temp, 1u); // Extract length.
2051 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
2052 }
2053
2054 // Store offset of string value in preparation for comparison loop
2055 __ Mov(temp1, value_offset);
2056
2057 temp1 = temp1.X();
2058 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2059 // Loop to compare strings 8 bytes at a time starting at the front of the string.
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002060 __ Bind(&loop);
2061 __ Ldr(out, MemOperand(str.X(), temp1));
2062 __ Ldr(temp2, MemOperand(arg.X(), temp1));
2063 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
2064 __ Cmp(out, temp2);
2065 __ B(&return_false, ne);
2066 // With string compression, we have compared 8 bytes, otherwise 4 chars.
2067 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
2068 __ B(&loop, hi);
jessicahandojo05765752016-09-09 19:01:32 -07002069 }
2070
Agi Csakiea34b402015-08-13 17:51:19 -07002071 // Return true and exit the function.
2072 // If loop does not result in returning false, we return true.
2073 __ Bind(&return_true);
2074 __ Mov(out, 1);
2075 __ B(&end);
2076
2077 // Return false and exit the function.
2078 __ Bind(&return_false);
2079 __ Mov(out, 0);
2080 __ Bind(&end);
2081}
2082
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002083static void GenerateVisitStringIndexOf(HInvoke* invoke,
Scott Wakeling97c72b72016-06-24 16:19:36 +01002084 MacroAssembler* masm,
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002085 CodeGeneratorARM64* codegen,
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002086 bool start_at_zero) {
2087 LocationSummary* locations = invoke->GetLocations();
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002088
2089 // Note that the null check must have been done earlier.
2090 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2091
2092 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002093 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002094 SlowPathCodeARM64* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002095 HInstruction* code_point = invoke->InputAt(1);
2096 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01002097 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002098 // Always needs the slow-path. We could directly dispatch to it, but this case should be
2099 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
Vladimir Marko174b2e22017-10-12 13:34:49 +01002100 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002101 codegen->AddSlowPath(slow_path);
2102 __ B(slow_path->GetEntryLabel());
2103 __ Bind(slow_path->GetExitLabel());
2104 return;
2105 }
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002106 } else if (code_point->GetType() != DataType::Type::kUint16) {
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002107 Register char_reg = WRegisterFrom(locations->InAt(1));
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002108 __ Tst(char_reg, 0xFFFF0000);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002109 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002110 codegen->AddSlowPath(slow_path);
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002111 __ B(ne, slow_path->GetEntryLabel());
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002112 }
2113
2114 if (start_at_zero) {
2115 // Start-index = 0.
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002116 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002117 __ Mov(tmp_reg, 0);
2118 }
2119
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002120 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
Roland Levillain42ad2882016-02-29 18:26:54 +00002121 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002122
2123 if (slow_path != nullptr) {
2124 __ Bind(slow_path->GetExitLabel());
2125 }
2126}
2127
2128void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002129 LocationSummary* locations = new (allocator_) LocationSummary(
2130 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002131 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2132 // best to align the inputs accordingly.
2133 InvokeRuntimeCallingConvention calling_convention;
2134 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2135 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002136 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002137
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002138 // Need to send start_index=0.
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002139 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2140}
2141
2142void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08002143 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002144}
2145
2146void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002147 LocationSummary* locations = new (allocator_) LocationSummary(
2148 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002149 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2150 // best to align the inputs accordingly.
2151 InvokeRuntimeCallingConvention calling_convention;
2152 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2153 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2154 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002155 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002156}
2157
2158void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08002159 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002160}
2161
Jeff Hao848f70a2014-01-15 13:49:50 -08002162void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002163 LocationSummary* locations = new (allocator_) LocationSummary(
2164 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002165 InvokeRuntimeCallingConvention calling_convention;
2166 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2167 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2168 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2169 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002170 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002171}
2172
2173void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002174 MacroAssembler* masm = GetVIXLAssembler();
Jeff Hao848f70a2014-01-15 13:49:50 -08002175 LocationSummary* locations = invoke->GetLocations();
2176
2177 Register byte_array = WRegisterFrom(locations->InAt(0));
2178 __ Cmp(byte_array, 0);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002179 SlowPathCodeARM64* slow_path =
2180 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08002181 codegen_->AddSlowPath(slow_path);
2182 __ B(eq, slow_path->GetEntryLabel());
2183
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002184 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
Roland Levillainf969a202016-03-09 16:14:00 +00002185 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002186 __ Bind(slow_path->GetExitLabel());
2187}
2188
2189void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002190 LocationSummary* locations =
2191 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002192 InvokeRuntimeCallingConvention calling_convention;
2193 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2194 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2195 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002196 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002197}
2198
2199void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
Roland Levillaincc3839c2016-02-29 16:23:48 +00002200 // No need to emit code checking whether `locations->InAt(2)` is a null
2201 // pointer, as callers of the native method
2202 //
2203 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2204 //
2205 // all include a null check on `data` before calling that method.
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002206 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00002207 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002208}
2209
2210void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002211 LocationSummary* locations = new (allocator_) LocationSummary(
2212 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002213 InvokeRuntimeCallingConvention calling_convention;
2214 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002215 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002216}
2217
2218void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002219 MacroAssembler* masm = GetVIXLAssembler();
Jeff Hao848f70a2014-01-15 13:49:50 -08002220 LocationSummary* locations = invoke->GetLocations();
2221
2222 Register string_to_copy = WRegisterFrom(locations->InAt(0));
2223 __ Cmp(string_to_copy, 0);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002224 SlowPathCodeARM64* slow_path =
2225 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08002226 codegen_->AddSlowPath(slow_path);
2227 __ B(eq, slow_path->GetEntryLabel());
2228
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002229 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
Roland Levillainf969a202016-03-09 16:14:00 +00002230 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002231 __ Bind(slow_path->GetExitLabel());
2232}
2233
Vladimir Markoca6fff82017-10-03 14:49:14 +01002234static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002235 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002236 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2237 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002238
Vladimir Markoca6fff82017-10-03 14:49:14 +01002239 LocationSummary* const locations =
2240 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002241 InvokeRuntimeCallingConvention calling_convention;
2242
2243 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2244 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2245}
2246
Vladimir Markoca6fff82017-10-03 14:49:14 +01002247static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002248 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002249 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2250 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2251 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002252
Vladimir Markoca6fff82017-10-03 14:49:14 +01002253 LocationSummary* const locations =
2254 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002255 InvokeRuntimeCallingConvention calling_convention;
2256
2257 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2258 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2259 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2260}
2261
Nikita Iashchenko3fa6e462021-09-10 17:30:04 +01002262static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2263 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
2264 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2265 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2266 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
2267 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2268
2269 LocationSummary* const locations =
2270 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2271
2272 locations->SetInAt(0, Location::RequiresFpuRegister());
2273 locations->SetInAt(1, Location::RequiresFpuRegister());
2274 locations->SetInAt(2, Location::RequiresFpuRegister());
2275 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2276}
2277
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002278static void GenFPToFPCall(HInvoke* invoke,
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002279 CodeGeneratorARM64* codegen,
2280 QuickEntrypointEnum entry) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002281 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002282}
2283
2284void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002285 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002286}
2287
2288void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002289 GenFPToFPCall(invoke, codegen_, kQuickCos);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002290}
2291
2292void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002293 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002294}
2295
2296void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002297 GenFPToFPCall(invoke, codegen_, kQuickSin);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002298}
2299
2300void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002301 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002302}
2303
2304void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002305 GenFPToFPCall(invoke, codegen_, kQuickAcos);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002306}
2307
2308void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002309 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002310}
2311
2312void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002313 GenFPToFPCall(invoke, codegen_, kQuickAsin);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002314}
2315
2316void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002317 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002318}
2319
2320void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002321 GenFPToFPCall(invoke, codegen_, kQuickAtan);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002322}
2323
2324void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002325 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002326}
2327
2328void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002329 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002330}
2331
2332void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002333 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002334}
2335
2336void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002337 GenFPToFPCall(invoke, codegen_, kQuickCosh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002338}
2339
2340void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002341 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002342}
2343
2344void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002345 GenFPToFPCall(invoke, codegen_, kQuickExp);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002346}
2347
2348void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002349 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002350}
2351
2352void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002353 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002354}
2355
2356void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002357 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002358}
2359
2360void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002361 GenFPToFPCall(invoke, codegen_, kQuickLog);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002362}
2363
2364void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002365 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002366}
2367
2368void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002369 GenFPToFPCall(invoke, codegen_, kQuickLog10);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002370}
2371
2372void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002373 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002374}
2375
2376void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002377 GenFPToFPCall(invoke, codegen_, kQuickSinh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002378}
2379
2380void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002381 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002382}
2383
2384void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002385 GenFPToFPCall(invoke, codegen_, kQuickTan);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002386}
2387
2388void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002389 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002390}
2391
2392void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002393 GenFPToFPCall(invoke, codegen_, kQuickTanh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002394}
2395
2396void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002397 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002398}
2399
2400void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002401 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002402}
2403
Vladimir Marko4d179872018-01-19 14:50:10 +00002404void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2405 CreateFPFPToFPCallLocations(allocator_, invoke);
2406}
2407
2408void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2409 GenFPToFPCall(invoke, codegen_, kQuickPow);
2410}
2411
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002412void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002413 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002414}
2415
2416void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002417 GenFPToFPCall(invoke, codegen_, kQuickHypot);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002418}
2419
2420void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002421 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002422}
2423
2424void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002425 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002426}
2427
Tim Zhang25abd6c2016-01-19 23:39:24 +08002428void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002429 LocationSummary* locations =
2430 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002431 locations->SetInAt(0, Location::RequiresRegister());
2432 locations->SetInAt(1, Location::RequiresRegister());
2433 locations->SetInAt(2, Location::RequiresRegister());
2434 locations->SetInAt(3, Location::RequiresRegister());
2435 locations->SetInAt(4, Location::RequiresRegister());
2436
2437 locations->AddTemp(Location::RequiresRegister());
2438 locations->AddTemp(Location::RequiresRegister());
Scott Wakelingdf109d92016-04-22 11:35:56 +01002439 locations->AddTemp(Location::RequiresRegister());
Tim Zhang25abd6c2016-01-19 23:39:24 +08002440}
2441
2442void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002443 MacroAssembler* masm = GetVIXLAssembler();
Tim Zhang25abd6c2016-01-19 23:39:24 +08002444 LocationSummary* locations = invoke->GetLocations();
2445
2446 // Check assumption that sizeof(Char) is 2 (used in scaling below).
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002447 const size_t char_size = DataType::Size(DataType::Type::kUint16);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002448 DCHECK_EQ(char_size, 2u);
2449
2450 // Location of data in char array buffer.
2451 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2452
2453 // Location of char array data in string.
2454 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2455
2456 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2457 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2458 Register srcObj = XRegisterFrom(locations->InAt(0));
2459 Register srcBegin = XRegisterFrom(locations->InAt(1));
2460 Register srcEnd = XRegisterFrom(locations->InAt(2));
2461 Register dstObj = XRegisterFrom(locations->InAt(3));
2462 Register dstBegin = XRegisterFrom(locations->InAt(4));
2463
2464 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002465 Register num_chr = XRegisterFrom(locations->GetTemp(1));
2466 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
Tim Zhang25abd6c2016-01-19 23:39:24 +08002467
2468 UseScratchRegisterScope temps(masm);
2469 Register dst_ptr = temps.AcquireX();
Scott Wakelingdf109d92016-04-22 11:35:56 +01002470 Register tmp2 = temps.AcquireX();
Tim Zhang25abd6c2016-01-19 23:39:24 +08002471
jessicahandojo05765752016-09-09 19:01:32 -07002472 vixl::aarch64::Label done;
David Horstmann53d220e2019-07-16 16:00:10 +01002473 vixl::aarch64::Label compressed_string_vector_loop;
2474 vixl::aarch64::Label compressed_string_remainder;
jessicahandojo05765752016-09-09 19:01:32 -07002475 __ Sub(num_chr, srcEnd, srcBegin);
2476 // Early out for valid zero-length retrievals.
2477 __ Cbz(num_chr, &done);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002478
Scott Wakelingdf109d92016-04-22 11:35:56 +01002479 // dst address start to copy to.
Tim Zhang25abd6c2016-01-19 23:39:24 +08002480 __ Add(dst_ptr, dstObj, Operand(data_offset));
2481 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2482
jessicahandojo05765752016-09-09 19:01:32 -07002483 // src address to copy from.
2484 __ Add(src_ptr, srcObj, Operand(value_offset));
2485 vixl::aarch64::Label compressed_string_preloop;
2486 if (mirror::kUseStringCompression) {
2487 // Location of count in string.
2488 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2489 // String's length.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002490 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2491 __ Tbz(tmp2, 0, &compressed_string_preloop);
jessicahandojo05765752016-09-09 19:01:32 -07002492 }
2493 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002494
Tim Zhang25abd6c2016-01-19 23:39:24 +08002495 // Do the copy.
Scott Wakeling97c72b72016-06-24 16:19:36 +01002496 vixl::aarch64::Label loop;
Scott Wakeling97c72b72016-06-24 16:19:36 +01002497 vixl::aarch64::Label remainder;
Scott Wakelingdf109d92016-04-22 11:35:56 +01002498
Scott Wakelingdf109d92016-04-22 11:35:56 +01002499 // Save repairing the value of num_chr on the < 8 character path.
2500 __ Subs(tmp1, num_chr, 8);
2501 __ B(lt, &remainder);
2502
2503 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2504 __ Mov(num_chr, tmp1);
2505
2506 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2507 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
Tim Zhang25abd6c2016-01-19 23:39:24 +08002508 __ Bind(&loop);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002509 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002510 __ Subs(num_chr, num_chr, 8);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002511 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002512 __ B(ge, &loop);
2513
2514 __ Adds(num_chr, num_chr, 8);
2515 __ B(eq, &done);
2516
2517 // Main loop for < 8 character case and remainder handling. Loads and stores one
2518 // 16-bit Java character at a time.
2519 __ Bind(&remainder);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002520 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002521 __ Subs(num_chr, num_chr, 1);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002522 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002523 __ B(gt, &remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002524 __ B(&done);
2525
2526 if (mirror::kUseStringCompression) {
David Horstmann53d220e2019-07-16 16:00:10 +01002527 // For compressed strings, acquire a SIMD temporary register.
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01002528 VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002529 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
jessicahandojo05765752016-09-09 19:01:32 -07002530 DCHECK_EQ(c_char_size, 1u);
2531 __ Bind(&compressed_string_preloop);
2532 __ Add(src_ptr, src_ptr, Operand(srcBegin));
David Horstmann53d220e2019-07-16 16:00:10 +01002533
2534 // Save repairing the value of num_chr on the < 8 character path.
2535 __ Subs(tmp1, num_chr, 8);
2536 __ B(lt, &compressed_string_remainder);
2537
2538 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2539 __ Mov(num_chr, tmp1);
2540
2541 // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2542 // Uses SIMD instructions.
2543 __ Bind(&compressed_string_vector_loop);
2544 __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2545 __ Subs(num_chr, num_chr, 8);
2546 __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2547 __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2548 __ B(ge, &compressed_string_vector_loop);
2549
2550 __ Adds(num_chr, num_chr, 8);
2551 __ B(eq, &done);
2552
2553 // Loop for < 8 character case and remainder handling with a compressed src.
2554 // Copies 1 character (8-bit) to (16-bit) at a time.
2555 __ Bind(&compressed_string_remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002556 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2557 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2558 __ Subs(num_chr, num_chr, Operand(1));
David Horstmann53d220e2019-07-16 16:00:10 +01002559 __ B(gt, &compressed_string_remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002560 }
Scott Wakelingdf109d92016-04-22 11:35:56 +01002561
Tim Zhang25abd6c2016-01-19 23:39:24 +08002562 __ Bind(&done);
2563}
2564
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002565// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2566// implementation there for longer copy lengths.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002567static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002568
2569static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2570 uint32_t at,
2571 HInstruction* input) {
2572 HIntConstant* const_input = input->AsIntConstant();
Scott Wakeling97c72b72016-06-24 16:19:36 +01002573 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002574 locations->SetInAt(at, Location::RequiresRegister());
2575 } else {
2576 locations->SetInAt(at, Location::RegisterOrConstant(input));
2577 }
2578}
2579
2580void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2581 // Check to see if we have known failures that will cause us to have to bail out
2582 // to the runtime, and just generate the runtime call directly.
2583 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2584 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2585
2586 // The positions must be non-negative.
2587 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2588 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2589 // We will have to fail anyways.
2590 return;
2591 }
2592
2593 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2594 // native implementation.
2595 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2596 if (length != nullptr) {
2597 int32_t len = length->GetValue();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002598 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002599 // Just call as normal.
2600 return;
2601 }
2602 }
2603
Vladimir Markoca6fff82017-10-03 14:49:14 +01002604 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2605 LocationSummary* locations =
2606 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002607 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2608 locations->SetInAt(0, Location::RequiresRegister());
2609 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2610 locations->SetInAt(2, Location::RequiresRegister());
2611 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2612 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2613
2614 locations->AddTemp(Location::RequiresRegister());
2615 locations->AddTemp(Location::RequiresRegister());
2616 locations->AddTemp(Location::RequiresRegister());
2617}
2618
Scott Wakeling97c72b72016-06-24 16:19:36 +01002619static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002620 const Location& pos,
2621 const Register& input,
2622 const Location& length,
2623 SlowPathCodeARM64* slow_path,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002624 const Register& temp,
2625 bool length_is_input_length = false) {
2626 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2627 if (pos.IsConstant()) {
2628 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2629 if (pos_const == 0) {
2630 if (!length_is_input_length) {
2631 // Check that length(input) >= length.
2632 __ Ldr(temp, MemOperand(input, length_offset));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002633 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002634 __ B(slow_path->GetEntryLabel(), lt);
2635 }
2636 } else {
2637 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002638 __ Ldr(temp, MemOperand(input, length_offset));
2639 __ Subs(temp, temp, pos_const);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002640 __ B(slow_path->GetEntryLabel(), lt);
2641
2642 // Check that (length(input) - pos) >= length.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002643 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002644 __ B(slow_path->GetEntryLabel(), lt);
2645 }
2646 } else if (length_is_input_length) {
2647 // The only way the copy can succeed is if pos is zero.
2648 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2649 } else {
2650 // Check that pos >= 0.
2651 Register pos_reg = WRegisterFrom(pos);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002652 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002653
2654 // Check that pos <= length(input) && (length(input) - pos) >= length.
2655 __ Ldr(temp, MemOperand(input, length_offset));
2656 __ Subs(temp, temp, pos_reg);
2657 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002658 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002659 __ B(slow_path->GetEntryLabel(), lt);
2660 }
2661}
2662
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002663// Compute base source address, base destination address, and end
2664// source address for System.arraycopy* intrinsics in `src_base`,
2665// `dst_base` and `src_end` respectively.
Scott Wakeling97c72b72016-06-24 16:19:36 +01002666static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002667 DataType::Type type,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002668 const Register& src,
2669 const Location& src_pos,
2670 const Register& dst,
2671 const Location& dst_pos,
2672 const Location& copy_length,
2673 const Register& src_base,
2674 const Register& dst_base,
2675 const Register& src_end) {
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002676 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002677 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
Roland Levillainebea3d22016-04-12 15:42:57 +01002678 << "Unexpected element type: " << type;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002679 const int32_t element_size = DataType::Size(type);
2680 const int32_t element_size_shift = DataType::SizeShift(type);
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002681 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002682
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002683 if (src_pos.IsConstant()) {
2684 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002685 __ Add(src_base, src, element_size * constant + data_offset);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002686 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002687 __ Add(src_base, src, data_offset);
2688 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002689 }
2690
2691 if (dst_pos.IsConstant()) {
2692 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002693 __ Add(dst_base, dst, element_size * constant + data_offset);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002694 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002695 __ Add(dst_base, dst, data_offset);
2696 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002697 }
2698
2699 if (copy_length.IsConstant()) {
2700 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002701 __ Add(src_end, src_base, element_size * constant);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002702 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002703 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002704 }
2705}
2706
2707void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002708 MacroAssembler* masm = GetVIXLAssembler();
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002709 LocationSummary* locations = invoke->GetLocations();
2710 Register src = XRegisterFrom(locations->InAt(0));
2711 Location src_pos = locations->InAt(1);
2712 Register dst = XRegisterFrom(locations->InAt(2));
2713 Location dst_pos = locations->InAt(3);
2714 Location length = locations->InAt(4);
2715
Vladimir Marko174b2e22017-10-12 13:34:49 +01002716 SlowPathCodeARM64* slow_path =
2717 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002718 codegen_->AddSlowPath(slow_path);
2719
2720 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2721 // copied in reverse and we can't know in all cases if it's needed.
2722 __ Cmp(src, dst);
2723 __ B(slow_path->GetEntryLabel(), eq);
2724
2725 // Bail out if the source is null.
2726 __ Cbz(src, slow_path->GetEntryLabel());
2727
2728 // Bail out if the destination is null.
2729 __ Cbz(dst, slow_path->GetEntryLabel());
2730
2731 if (!length.IsConstant()) {
Vladimir Markoc5646202016-11-28 16:03:15 +00002732 // Merge the following two comparisons into one:
2733 // If the length is negative, bail out (delegate to libcore's native implementation).
2734 // If the length > 32 then (currently) prefer libcore's native implementation.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002735 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
Vladimir Markoc5646202016-11-28 16:03:15 +00002736 __ B(slow_path->GetEntryLabel(), hi);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002737 } else {
2738 // We have already checked in the LocationsBuilder for the constant case.
2739 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2740 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2741 }
2742
2743 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2744 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2745 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2746
2747 CheckSystemArrayCopyPosition(masm,
2748 src_pos,
2749 src,
2750 length,
2751 slow_path,
2752 src_curr_addr,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002753 false);
2754
2755 CheckSystemArrayCopyPosition(masm,
2756 dst_pos,
2757 dst,
2758 length,
2759 slow_path,
2760 src_curr_addr,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002761 false);
2762
2763 src_curr_addr = src_curr_addr.X();
2764 dst_curr_addr = dst_curr_addr.X();
2765 src_stop_addr = src_stop_addr.X();
2766
2767 GenSystemArrayCopyAddresses(masm,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002768 DataType::Type::kUint16,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002769 src,
2770 src_pos,
2771 dst,
2772 dst_pos,
2773 length,
2774 src_curr_addr,
2775 dst_curr_addr,
2776 src_stop_addr);
2777
2778 // Iterate over the arrays and do a raw copy of the chars.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002779 const int32_t char_size = DataType::Size(DataType::Type::kUint16);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002780 UseScratchRegisterScope temps(masm);
2781 Register tmp = temps.AcquireW();
Scott Wakeling97c72b72016-06-24 16:19:36 +01002782 vixl::aarch64::Label loop, done;
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002783 __ Bind(&loop);
2784 __ Cmp(src_curr_addr, src_stop_addr);
2785 __ B(&done, eq);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002786 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2787 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002788 __ B(&loop);
2789 __ Bind(&done);
2790
2791 __ Bind(slow_path->GetExitLabel());
2792}
2793
donghui.baic2ec9ad2016-03-10 14:02:55 +08002794// We can choose to use the native implementation there for longer copy lengths.
2795static constexpr int32_t kSystemArrayCopyThreshold = 128;
2796
2797// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2798// We want to use two temporary registers in order to reduce the register pressure in arm64.
2799// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
2800void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002801 // The only read barrier implementation supporting the
2802 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2803 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain3d312422016-06-23 13:53:42 +01002804 return;
2805 }
2806
donghui.baic2ec9ad2016-03-10 14:02:55 +08002807 // Check to see if we have known failures that will cause us to have to bail out
2808 // to the runtime, and just generate the runtime call directly.
2809 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2810 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2811
2812 // The positions must be non-negative.
2813 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2814 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2815 // We will have to fail anyways.
2816 return;
2817 }
2818
2819 // The length must be >= 0.
2820 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2821 if (length != nullptr) {
2822 int32_t len = length->GetValue();
2823 if (len < 0 || len >= kSystemArrayCopyThreshold) {
2824 // Just call as normal.
2825 return;
2826 }
2827 }
2828
2829 SystemArrayCopyOptimizations optimizations(invoke);
2830
2831 if (optimizations.GetDestinationIsSource()) {
2832 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2833 // We only support backward copying if source and destination are the same.
2834 return;
2835 }
2836 }
2837
2838 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2839 // We currently don't intrinsify primitive copying.
2840 return;
2841 }
2842
Vladimir Markoca6fff82017-10-03 14:49:14 +01002843 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2844 LocationSummary* locations =
2845 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002846 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2847 locations->SetInAt(0, Location::RequiresRegister());
2848 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2849 locations->SetInAt(2, Location::RequiresRegister());
2850 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2851 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2852
2853 locations->AddTemp(Location::RequiresRegister());
2854 locations->AddTemp(Location::RequiresRegister());
Roland Levillain0b671c02016-08-19 12:02:34 +01002855 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2856 // Temporary register IP0, obtained from the VIXL scratch register
2857 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2858 // (because that register is clobbered by ReadBarrierMarkRegX
Roland Levillain54f869e2017-03-06 13:54:11 +00002859 // entry points). It cannot be used in calls to
2860 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2861 // either. For these reasons, get a third extra temporary register
2862 // from the register allocator.
Roland Levillain0b671c02016-08-19 12:02:34 +01002863 locations->AddTemp(Location::RequiresRegister());
Roland Levillain54f869e2017-03-06 13:54:11 +00002864 } else {
2865 // Cases other than Baker read barriers: the third temporary will
2866 // be acquired from the VIXL scratch register pool.
Roland Levillain0b671c02016-08-19 12:02:34 +01002867 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08002868}
2869
2870void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002871 // The only read barrier implementation supporting the
2872 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2873 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
Roland Levillain3d312422016-06-23 13:53:42 +01002874
Scott Wakeling97c72b72016-06-24 16:19:36 +01002875 MacroAssembler* masm = GetVIXLAssembler();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002876 LocationSummary* locations = invoke->GetLocations();
2877
2878 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2879 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2880 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2881 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Roland Levillain0b671c02016-08-19 12:02:34 +01002882 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002883
2884 Register src = XRegisterFrom(locations->InAt(0));
2885 Location src_pos = locations->InAt(1);
2886 Register dest = XRegisterFrom(locations->InAt(2));
2887 Location dest_pos = locations->InAt(3);
2888 Location length = locations->InAt(4);
2889 Register temp1 = WRegisterFrom(locations->GetTemp(0));
Roland Levillain0b671c02016-08-19 12:02:34 +01002890 Location temp1_loc = LocationFrom(temp1);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002891 Register temp2 = WRegisterFrom(locations->GetTemp(1));
Roland Levillain0b671c02016-08-19 12:02:34 +01002892 Location temp2_loc = LocationFrom(temp2);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002893
Vladimir Marko174b2e22017-10-12 13:34:49 +01002894 SlowPathCodeARM64* intrinsic_slow_path =
2895 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Roland Levillain0b671c02016-08-19 12:02:34 +01002896 codegen_->AddSlowPath(intrinsic_slow_path);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002897
Scott Wakeling97c72b72016-06-24 16:19:36 +01002898 vixl::aarch64::Label conditions_on_positions_validated;
donghui.baic2ec9ad2016-03-10 14:02:55 +08002899 SystemArrayCopyOptimizations optimizations(invoke);
2900
donghui.baic2ec9ad2016-03-10 14:02:55 +08002901 // If source and destination are the same, we go to slow path if we need to do
2902 // forward copying.
2903 if (src_pos.IsConstant()) {
2904 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2905 if (dest_pos.IsConstant()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002906 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2907 if (optimizations.GetDestinationIsSource()) {
2908 // Checked when building locations.
2909 DCHECK_GE(src_pos_constant, dest_pos_constant);
2910 } else if (src_pos_constant < dest_pos_constant) {
2911 __ Cmp(src, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01002912 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002913 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08002914 // Checked when building locations.
2915 DCHECK(!optimizations.GetDestinationIsSource()
2916 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2917 } else {
2918 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002919 __ Cmp(src, dest);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002920 __ B(&conditions_on_positions_validated, ne);
2921 }
2922 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
Roland Levillain0b671c02016-08-19 12:02:34 +01002923 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002924 }
2925 } else {
2926 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002927 __ Cmp(src, dest);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002928 __ B(&conditions_on_positions_validated, ne);
2929 }
2930 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2931 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
Roland Levillain0b671c02016-08-19 12:02:34 +01002932 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002933 }
2934
2935 __ Bind(&conditions_on_positions_validated);
2936
2937 if (!optimizations.GetSourceIsNotNull()) {
2938 // Bail out if the source is null.
Roland Levillain0b671c02016-08-19 12:02:34 +01002939 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08002940 }
2941
2942 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2943 // Bail out if the destination is null.
Roland Levillain0b671c02016-08-19 12:02:34 +01002944 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08002945 }
2946
2947 // We have already checked in the LocationsBuilder for the constant case.
2948 if (!length.IsConstant() &&
2949 !optimizations.GetCountIsSourceLength() &&
2950 !optimizations.GetCountIsDestinationLength()) {
Vladimir Markoc5646202016-11-28 16:03:15 +00002951 // Merge the following two comparisons into one:
2952 // If the length is negative, bail out (delegate to libcore's native implementation).
2953 // If the length >= 128 then (currently) prefer native implementation.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002954 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
Vladimir Markoc5646202016-11-28 16:03:15 +00002955 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002956 }
2957 // Validity checks: source.
2958 CheckSystemArrayCopyPosition(masm,
2959 src_pos,
2960 src,
2961 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01002962 intrinsic_slow_path,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002963 temp1,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002964 optimizations.GetCountIsSourceLength());
2965
2966 // Validity checks: dest.
2967 CheckSystemArrayCopyPosition(masm,
2968 dest_pos,
2969 dest,
2970 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01002971 intrinsic_slow_path,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002972 temp1,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002973 optimizations.GetCountIsDestinationLength());
2974 {
2975 // We use a block to end the scratch scope before the write barrier, thus
2976 // freeing the temporary registers so they can be used in `MarkGCCard`.
2977 UseScratchRegisterScope temps(masm);
Vladimir Markof4f2daa2017-03-20 18:26:59 +00002978 Location temp3_loc; // Used only for Baker read barrier.
Roland Levillain54f869e2017-03-06 13:54:11 +00002979 Register temp3;
2980 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Vladimir Markof4f2daa2017-03-20 18:26:59 +00002981 temp3_loc = locations->GetTemp(2);
2982 temp3 = WRegisterFrom(temp3_loc);
Roland Levillain54f869e2017-03-06 13:54:11 +00002983 } else {
2984 temp3 = temps.AcquireW();
2985 }
Roland Levillain0b671c02016-08-19 12:02:34 +01002986
donghui.baic2ec9ad2016-03-10 14:02:55 +08002987 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2988 // Check whether all elements of the source array are assignable to the component
2989 // type of the destination array. We do two checks: the classes are the same,
2990 // or the destination is Object[]. If none of these checks succeed, we go to the
2991 // slow path.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002992
Roland Levillain0b671c02016-08-19 12:02:34 +01002993 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2994 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2995 // /* HeapReference<Class> */ temp1 = src->klass_
2996 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2997 temp1_loc,
2998 src.W(),
2999 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003000 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003001 /* needs_null_check= */ false,
3002 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003003 // Bail out if the source is not a non primitive array.
3004 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3005 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3006 temp1_loc,
3007 temp1,
3008 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003009 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003010 /* needs_null_check= */ false,
3011 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003012 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
3013 // If heap poisoning is enabled, `temp1` has been unpoisoned
3014 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3015 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
3016 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
3017 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3018 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003019 }
Roland Levillain0b671c02016-08-19 12:02:34 +01003020
3021 // /* HeapReference<Class> */ temp1 = dest->klass_
3022 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3023 temp1_loc,
3024 dest.W(),
3025 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003026 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003027 /* needs_null_check= */ false,
3028 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003029
3030 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3031 // Bail out if the destination is not a non primitive array.
3032 //
3033 // Register `temp1` is not trashed by the read barrier emitted
3034 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3035 // method produces a call to a ReadBarrierMarkRegX entry point,
3036 // which saves all potentially live registers, including
3037 // temporaries such a `temp1`.
3038 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3039 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3040 temp2_loc,
3041 temp1,
3042 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003043 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003044 /* needs_null_check= */ false,
3045 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003046 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3047 // If heap poisoning is enabled, `temp2` has been unpoisoned
3048 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3049 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
3050 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
3051 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3052 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
3053 }
3054
3055 // For the same reason given earlier, `temp1` is not trashed by the
3056 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3057 // /* HeapReference<Class> */ temp2 = src->klass_
3058 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3059 temp2_loc,
3060 src.W(),
3061 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003062 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003063 /* needs_null_check= */ false,
3064 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003065 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3066 __ Cmp(temp1, temp2);
3067
3068 if (optimizations.GetDestinationIsTypedObjectArray()) {
3069 vixl::aarch64::Label do_copy;
3070 __ B(&do_copy, eq);
3071 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3072 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3073 temp1_loc,
3074 temp1,
3075 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003076 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003077 /* needs_null_check= */ false,
3078 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003079 // /* HeapReference<Class> */ temp1 = temp1->super_class_
3080 // We do not need to emit a read barrier for the following
3081 // heap reference load, as `temp1` is only used in a
3082 // comparison with null below, and this reference is not
3083 // kept afterwards.
3084 __ Ldr(temp1, HeapOperand(temp1, super_offset));
3085 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3086 __ Bind(&do_copy);
3087 } else {
3088 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3089 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003090 } else {
Roland Levillain0b671c02016-08-19 12:02:34 +01003091 // Non read barrier code.
3092
3093 // /* HeapReference<Class> */ temp1 = dest->klass_
3094 __ Ldr(temp1, MemOperand(dest, class_offset));
3095 // /* HeapReference<Class> */ temp2 = src->klass_
3096 __ Ldr(temp2, MemOperand(src, class_offset));
3097 bool did_unpoison = false;
3098 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
3099 !optimizations.GetSourceIsNonPrimitiveArray()) {
3100 // One or two of the references need to be unpoisoned. Unpoison them
3101 // both to make the identity check valid.
3102 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3103 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3104 did_unpoison = true;
3105 }
3106
3107 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3108 // Bail out if the destination is not a non primitive array.
3109 // /* HeapReference<Class> */ temp3 = temp1->component_type_
3110 __ Ldr(temp3, HeapOperand(temp1, component_offset));
3111 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
3112 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
3113 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
3114 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
3115 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3116 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
3117 }
3118
3119 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3120 // Bail out if the source is not a non primitive array.
3121 // /* HeapReference<Class> */ temp3 = temp2->component_type_
3122 __ Ldr(temp3, HeapOperand(temp2, component_offset));
3123 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
3124 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
3125 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
3126 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
3127 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3128 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
3129 }
3130
3131 __ Cmp(temp1, temp2);
3132
3133 if (optimizations.GetDestinationIsTypedObjectArray()) {
3134 vixl::aarch64::Label do_copy;
3135 __ B(&do_copy, eq);
3136 if (!did_unpoison) {
3137 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3138 }
3139 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3140 __ Ldr(temp1, HeapOperand(temp1, component_offset));
3141 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3142 // /* HeapReference<Class> */ temp1 = temp1->super_class_
3143 __ Ldr(temp1, HeapOperand(temp1, super_offset));
3144 // No need to unpoison the result, we're comparing against null.
3145 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3146 __ Bind(&do_copy);
3147 } else {
3148 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3149 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003150 }
3151 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3152 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3153 // Bail out if the source is not a non primitive array.
Roland Levillain0b671c02016-08-19 12:02:34 +01003154 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3155 // /* HeapReference<Class> */ temp1 = src->klass_
3156 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3157 temp1_loc,
3158 src.W(),
3159 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003160 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003161 /* needs_null_check= */ false,
3162 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003163 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3164 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3165 temp2_loc,
3166 temp1,
3167 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003168 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003169 /* needs_null_check= */ false,
3170 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003171 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3172 // If heap poisoning is enabled, `temp2` has been unpoisoned
3173 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3174 } else {
3175 // /* HeapReference<Class> */ temp1 = src->klass_
3176 __ Ldr(temp1, HeapOperand(src.W(), class_offset));
3177 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3178 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3179 __ Ldr(temp2, HeapOperand(temp1, component_offset));
3180 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3181 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3182 }
3183 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
3184 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
donghui.baic2ec9ad2016-03-10 14:02:55 +08003185 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
Roland Levillain0b671c02016-08-19 12:02:34 +01003186 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003187 }
3188
Roland Levillain1663d162017-03-17 15:15:21 +00003189 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3190 // Null constant length: not need to emit the loop code at all.
Roland Levillain0b671c02016-08-19 12:02:34 +01003191 } else {
Roland Levillain1663d162017-03-17 15:15:21 +00003192 Register src_curr_addr = temp1.X();
3193 Register dst_curr_addr = temp2.X();
3194 Register src_stop_addr = temp3.X();
3195 vixl::aarch64::Label done;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003196 const DataType::Type type = DataType::Type::kReference;
3197 const int32_t element_size = DataType::Size(type);
Roland Levillain1663d162017-03-17 15:15:21 +00003198
3199 if (length.IsRegister()) {
3200 // Don't enter the copy loop if the length is null.
3201 __ Cbz(WRegisterFrom(length), &done);
3202 }
3203
3204 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3205 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3206
3207 // SystemArrayCopy implementation for Baker read barriers (see
Roland Levillain9983e302017-07-14 14:34:22 +01003208 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
Roland Levillain1663d162017-03-17 15:15:21 +00003209 //
3210 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3211 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3212 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3213 // if (is_gray) {
3214 // // Slow-path copy.
3215 // do {
3216 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3217 // } while (src_ptr != end_ptr)
3218 // } else {
3219 // // Fast-path copy.
3220 // do {
3221 // *dest_ptr++ = *src_ptr++;
3222 // } while (src_ptr != end_ptr)
3223 // }
3224
3225 // Make sure `tmp` is not IP0, as it is clobbered by
3226 // ReadBarrierMarkRegX entry points in
3227 // ReadBarrierSystemArrayCopySlowPathARM64.
Roland Levillain1ca955d2017-04-13 19:34:30 +01003228 DCHECK(temps.IsAvailable(ip0));
Roland Levillain1663d162017-03-17 15:15:21 +00003229 temps.Exclude(ip0);
Roland Levillain0b671c02016-08-19 12:02:34 +01003230 Register tmp = temps.AcquireW();
Roland Levillain1663d162017-03-17 15:15:21 +00003231 DCHECK_NE(LocationFrom(tmp).reg(), IP0);
Roland Levillain1ca955d2017-04-13 19:34:30 +01003232 // Put IP0 back in the pool so that VIXL has at least one
3233 // scratch register available to emit macro-instructions (note
3234 // that IP1 is already used for `tmp`). Indeed some
3235 // macro-instructions used in GenSystemArrayCopyAddresses
3236 // (invoked hereunder) may require a scratch register (for
3237 // instance to emit a load with a large constant offset).
3238 temps.Include(ip0);
Roland Levillain1663d162017-03-17 15:15:21 +00003239
3240 // /* int32_t */ monitor = src->monitor_
3241 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3242 // /* LockWord */ lock_word = LockWord(monitor)
3243 static_assert(sizeof(LockWord) == sizeof(int32_t),
3244 "art::LockWord and int32_t have different sizes.");
3245
3246 // Introduce a dependency on the lock_word including rb_state,
3247 // to prevent load-load reordering, and without using
3248 // a memory barrier (which would be more expensive).
3249 // `src` is unchanged by this operation, but its value now depends
3250 // on `tmp`.
3251 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3252
3253 // Compute base source address, base destination address, and end
3254 // source address for System.arraycopy* intrinsics in `src_base`,
3255 // `dst_base` and `src_end` respectively.
3256 // Note that `src_curr_addr` is computed from from `src` (and
3257 // `src_pos`) here, and thus honors the artificial dependency
3258 // of `src` on `tmp`.
3259 GenSystemArrayCopyAddresses(masm,
3260 type,
3261 src,
3262 src_pos,
3263 dest,
3264 dest_pos,
3265 length,
3266 src_curr_addr,
3267 dst_curr_addr,
3268 src_stop_addr);
3269
3270 // Slow path used to copy array when `src` is gray.
3271 SlowPathCodeARM64* read_barrier_slow_path =
Vladimir Marko174b2e22017-10-12 13:34:49 +01003272 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3273 invoke, LocationFrom(tmp));
Roland Levillain1663d162017-03-17 15:15:21 +00003274 codegen_->AddSlowPath(read_barrier_slow_path);
3275
3276 // Given the numeric representation, it's enough to check the low bit of the rb_state.
Roland Levillain14e5a292018-06-28 12:00:56 +01003277 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
Roland Levillain1663d162017-03-17 15:15:21 +00003278 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3279 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3280
3281 // Fast-path copy.
3282 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3283 // poison/unpoison.
3284 vixl::aarch64::Label loop;
3285 __ Bind(&loop);
Roland Levillain0b671c02016-08-19 12:02:34 +01003286 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3287 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
Roland Levillain1663d162017-03-17 15:15:21 +00003288 __ Cmp(src_curr_addr, src_stop_addr);
3289 __ B(&loop, ne);
3290
3291 __ Bind(read_barrier_slow_path->GetExitLabel());
3292 } else {
3293 // Non read barrier code.
3294 // Compute base source address, base destination address, and end
3295 // source address for System.arraycopy* intrinsics in `src_base`,
3296 // `dst_base` and `src_end` respectively.
3297 GenSystemArrayCopyAddresses(masm,
3298 type,
3299 src,
3300 src_pos,
3301 dest,
3302 dest_pos,
3303 length,
3304 src_curr_addr,
3305 dst_curr_addr,
3306 src_stop_addr);
3307 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3308 // poison/unpoison.
3309 vixl::aarch64::Label loop;
3310 __ Bind(&loop);
3311 {
3312 Register tmp = temps.AcquireW();
3313 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3314 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3315 }
3316 __ Cmp(src_curr_addr, src_stop_addr);
3317 __ B(&loop, ne);
Roland Levillain0b671c02016-08-19 12:02:34 +01003318 }
Roland Levillain0b671c02016-08-19 12:02:34 +01003319 __ Bind(&done);
donghui.baic2ec9ad2016-03-10 14:02:55 +08003320 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003321 }
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003322
donghui.baic2ec9ad2016-03-10 14:02:55 +08003323 // We only need one card marking on the destination array.
Andreas Gampe3db70682018-12-26 15:12:03 -08003324 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
donghui.baic2ec9ad2016-03-10 14:02:55 +08003325
Roland Levillain0b671c02016-08-19 12:02:34 +01003326 __ Bind(intrinsic_slow_path->GetExitLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003327}
3328
Anton Kirilova3ffea22016-04-07 17:02:37 +01003329static void GenIsInfinite(LocationSummary* locations,
3330 bool is64bit,
Scott Wakeling97c72b72016-06-24 16:19:36 +01003331 MacroAssembler* masm) {
Artem Serova07de552020-11-01 22:42:43 +00003332 Operand infinity(0);
3333 Operand tst_mask(0);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003334 Register out;
3335
3336 if (is64bit) {
Artem Serova07de552020-11-01 22:42:43 +00003337 infinity = Operand(kPositiveInfinityDouble);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003338 tst_mask = MaskLeastSignificant<uint64_t>(63);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003339 out = XRegisterFrom(locations->Out());
3340 } else {
Artem Serova07de552020-11-01 22:42:43 +00003341 infinity = Operand(kPositiveInfinityFloat);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003342 tst_mask = MaskLeastSignificant<uint32_t>(31);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003343 out = WRegisterFrom(locations->Out());
3344 }
3345
Anton Kirilova3ffea22016-04-07 17:02:37 +01003346 MoveFPToInt(locations, is64bit, masm);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003347 // Checks whether exponent bits are all 1 and fraction bits are all 0.
Anton Kirilova3ffea22016-04-07 17:02:37 +01003348 __ Eor(out, out, infinity);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003349 // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3350 // depending on is64bit.
3351 __ Tst(out, tst_mask);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003352 __ Cset(out, eq);
3353}
3354
3355void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003356 CreateFPToIntLocations(allocator_, invoke);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003357}
3358
3359void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08003360 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Anton Kirilova3ffea22016-04-07 17:02:37 +01003361}
3362
3363void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003364 CreateFPToIntLocations(allocator_, invoke);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003365}
3366
3367void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08003368 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Anton Kirilova3ffea22016-04-07 17:02:37 +01003369}
3370
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003371void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
3372 InvokeRuntimeCallingConvention calling_convention;
3373 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3374 invoke,
3375 codegen_,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003376 calling_convention.GetReturnLocation(DataType::Type::kReference),
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003377 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3378}
3379
3380void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
Vladimir Marko6fd16062018-06-26 11:02:04 +01003381 IntrinsicVisitor::IntegerValueOfInfo info =
3382 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003383 LocationSummary* locations = invoke->GetLocations();
3384 MacroAssembler* masm = GetVIXLAssembler();
3385
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003386 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003387 UseScratchRegisterScope temps(masm);
3388 Register temp = temps.AcquireW();
Vladimir Markode91ca92020-10-27 13:41:40 +00003389 auto allocate_instance = [&]() {
3390 DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3391 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3392 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3393 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3394 };
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003395 if (invoke->InputAt(0)->IsConstant()) {
3396 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
Vladimir Marko6fd16062018-06-26 11:02:04 +01003397 if (static_cast<uint32_t>(value - info.low) < info.length) {
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003398 // Just embed the j.l.Integer in the code.
Vladimir Marko6fd16062018-06-26 11:02:04 +01003399 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3400 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003401 } else {
Vladimir Markoeebb8212018-06-05 14:57:24 +01003402 DCHECK(locations->CanCall());
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003403 // Allocate and initialize a new j.l.Integer.
3404 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3405 // JIT object table.
Vladimir Markode91ca92020-10-27 13:41:40 +00003406 allocate_instance();
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003407 __ Mov(temp.W(), value);
3408 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
Hans Boehmcc5629c2020-10-30 16:12:01 -07003409 // Class pointer and `value` final field stores require a barrier before publication.
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003410 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3411 }
3412 } else {
Vladimir Markoeebb8212018-06-05 14:57:24 +01003413 DCHECK(locations->CanCall());
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003414 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003415 // Check bounds of our cache.
3416 __ Add(out.W(), in.W(), -info.low);
Vladimir Markoeebb8212018-06-05 14:57:24 +01003417 __ Cmp(out.W(), info.length);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003418 vixl::aarch64::Label allocate, done;
3419 __ B(&allocate, hs);
3420 // If the value is within the bounds, load the j.l.Integer directly from the array.
Vladimir Marko6fd16062018-06-26 11:02:04 +01003421 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003422 MemOperand source = HeapOperand(
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003423 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3424 codegen_->Load(DataType::Type::kReference, out, source);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003425 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3426 __ B(&done);
3427 __ Bind(&allocate);
3428 // Otherwise allocate and initialize a new j.l.Integer.
Vladimir Markode91ca92020-10-27 13:41:40 +00003429 allocate_instance();
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003430 __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
Hans Boehmcc5629c2020-10-30 16:12:01 -07003431 // Class pointer and `value` final field stores require a barrier before publication.
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003432 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3433 __ Bind(&done);
3434 }
3435}
3436
Vladimir Marko01b65522020-10-28 15:43:54 +00003437void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3438 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3439
3440 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
3441 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3442 }
3443}
3444
3445void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3446 MacroAssembler* masm = GetVIXLAssembler();
3447 LocationSummary* locations = invoke->GetLocations();
3448
3449 Location obj = locations->InAt(0);
3450 Location out = locations->Out();
3451
3452 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3453 codegen_->AddSlowPath(slow_path);
3454
3455 if (kEmitCompilerReadBarrier) {
3456 // Check self->GetWeakRefAccessEnabled().
3457 UseScratchRegisterScope temps(masm);
3458 Register temp = temps.AcquireW();
3459 __ Ldr(temp,
3460 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
3461 __ Cbz(temp, slow_path->GetEntryLabel());
3462 }
3463
3464 {
3465 // Load the java.lang.ref.Reference class.
3466 UseScratchRegisterScope temps(masm);
3467 Register temp = temps.AcquireW();
3468 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3469
3470 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3471 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3472 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3473 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3474 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3475 __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3476 __ Cbnz(temp, slow_path->GetEntryLabel());
3477 }
3478
3479 // Load the value from the field.
3480 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3481 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3482 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3483 out,
3484 WRegisterFrom(obj),
3485 referent_offset,
3486 /*maybe_temp=*/ locations->GetTemp(0),
3487 /*needs_null_check=*/ true,
3488 /*use_load_acquire=*/ true);
3489 } else {
3490 MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
Vladimir Marko98873af2020-12-16 12:10:03 +00003491 codegen_->LoadAcquire(
3492 invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
Vladimir Marko01b65522020-10-28 15:43:54 +00003493 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3494 }
3495 __ Bind(slow_path->GetExitLabel());
3496}
3497
Vladimir Markoac27ac02021-02-01 09:31:02 +00003498void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3499 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3500}
3501
3502void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3503 LocationSummary* locations = invoke->GetLocations();
3504 MacroAssembler* masm = codegen_->GetVIXLAssembler();
3505 UseScratchRegisterScope temps(masm);
3506
3507 Register obj = WRegisterFrom(locations->InAt(0));
3508 Register other = WRegisterFrom(locations->InAt(1));
3509 Register out = WRegisterFrom(locations->Out());
3510 Register tmp = temps.AcquireW();
3511
3512 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3513 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3514
3515 MemOperand field = HeapOperand(obj, referent_offset);
3516 codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
Vladimir Markoa0a20cd2021-02-05 15:55:47 +00003517 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
Vladimir Markoac27ac02021-02-01 09:31:02 +00003518
3519 __ Cmp(tmp, other);
3520
3521 if (kEmitCompilerReadBarrier) {
3522 DCHECK(kUseBakerReadBarrier);
3523
3524 vixl::aarch64::Label calculate_result;
3525
3526 // If the GC is not marking, the comparison result is final.
3527 __ Cbz(mr, &calculate_result);
3528
3529 __ B(&calculate_result, eq); // ZF set if taken.
3530
3531 // Check if the loaded reference is null.
3532 __ Cbz(tmp, &calculate_result); // ZF clear if taken.
3533
3534 // For correct memory visibility, we need a barrier before loading the lock word.
3535 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3536
3537 // Load the lockword and check if it is a forwarding address.
3538 static_assert(LockWord::kStateShift == 30u);
3539 static_assert(LockWord::kStateForwardingAddress == 3u);
3540 __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3541 __ Cmp(tmp, Operand(0xc0000000));
3542 __ B(&calculate_result, lo); // ZF clear if taken.
3543
3544 // Extract the forwarding address and compare with `other`.
3545 __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3546
3547 __ Bind(&calculate_result);
3548 }
3549
3550 // Convert ZF into the Boolean result.
3551 __ Cset(out, eq);
3552}
3553
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003554void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003555 LocationSummary* locations =
3556 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003557 locations->SetOut(Location::RequiresRegister());
3558}
3559
3560void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3561 MacroAssembler* masm = GetVIXLAssembler();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003562 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003563 UseScratchRegisterScope temps(masm);
3564 Register temp = temps.AcquireX();
3565
3566 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3567 __ Ldar(out.W(), MemOperand(temp));
3568
3569 vixl::aarch64::Label done;
3570 __ Cbz(out.W(), &done);
3571 __ Stlr(wzr, MemOperand(temp));
3572 __ Bind(&done);
3573}
3574
Hans Boehmc7b28de2018-03-09 17:05:28 -08003575void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3576 LocationSummary* locations =
3577 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3578 locations->SetInAt(0, Location::Any());
3579}
3580
3581void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3582
xueliang.zhongcb58b072017-10-13 12:06:56 +01003583void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3584 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3585 return;
3586 }
3587
3588 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3589 LocationSummary::kNoCall,
3590 kIntrinsified);
3591
3592 locations->SetInAt(0, Location::RequiresRegister());
3593 locations->SetInAt(1, Location::RequiresRegister());
Evgeny Astigeevichc01dc292018-12-12 15:32:57 +00003594 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
xueliang.zhongcb58b072017-10-13 12:06:56 +01003595}
3596
3597// Lower the invoke of CRC32.update(int crc, int b).
3598void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3599 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3600
3601 MacroAssembler* masm = GetVIXLAssembler();
3602
3603 Register crc = InputRegisterAt(invoke, 0);
3604 Register val = InputRegisterAt(invoke, 1);
3605 Register out = OutputRegister(invoke);
3606
3607 // The general algorithm of the CRC32 calculation is:
3608 // crc = ~crc
3609 // result = crc32_for_byte(crc, b)
3610 // crc = ~result
3611 // It is directly lowered to three instructions.
Evgeny Astigeevichc01dc292018-12-12 15:32:57 +00003612
3613 UseScratchRegisterScope temps(masm);
3614 Register tmp = temps.AcquireSameSizeAs(out);
3615
3616 __ Mvn(tmp, crc);
3617 __ Crc32b(tmp, tmp, val);
3618 __ Mvn(out, tmp);
xueliang.zhongcb58b072017-10-13 12:06:56 +01003619}
3620
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003621// Generate code using CRC32 instructions which calculates
3622// a CRC32 value of a byte.
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003623//
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003624// Parameters:
3625// masm - VIXL macro assembler
3626// crc - a register holding an initial CRC value
3627// ptr - a register holding a memory address of bytes
3628// length - a register holding a number of bytes to process
3629// out - a register to put a result of calculation
3630static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3631 const Register& crc,
3632 const Register& ptr,
3633 const Register& length,
3634 const Register& out) {
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003635 // The algorithm of CRC32 of bytes is:
3636 // crc = ~crc
3637 // process a few first bytes to make the array 8-byte aligned
3638 // while array has 8 bytes do:
3639 // crc = crc32_of_8bytes(crc, 8_bytes(array))
3640 // if array has 4 bytes:
3641 // crc = crc32_of_4bytes(crc, 4_bytes(array))
3642 // if array has 2 bytes:
3643 // crc = crc32_of_2bytes(crc, 2_bytes(array))
3644 // if array has a byte:
3645 // crc = crc32_of_byte(crc, 1_byte(array))
3646 // crc = ~crc
3647
3648 vixl::aarch64::Label loop, done;
3649 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3650 vixl::aarch64::Label aligned2, aligned4, aligned8;
3651
3652 // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3653 // instructions below.
3654 UseScratchRegisterScope temps(masm);
3655 Register len = temps.AcquireW();
3656 Register array_elem = temps.AcquireW();
3657
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003658 __ Mvn(out, crc);
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003659 __ Mov(len, length);
3660
3661 __ Tbz(ptr, 0, &aligned2);
3662 __ Subs(len, len, 1);
3663 __ B(&done, lo);
3664 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3665 __ Crc32b(out, out, array_elem);
3666
3667 __ Bind(&aligned2);
3668 __ Tbz(ptr, 1, &aligned4);
3669 __ Subs(len, len, 2);
3670 __ B(&process_1byte, lo);
3671 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3672 __ Crc32h(out, out, array_elem);
3673
3674 __ Bind(&aligned4);
3675 __ Tbz(ptr, 2, &aligned8);
3676 __ Subs(len, len, 4);
3677 __ B(&process_2bytes, lo);
3678 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3679 __ Crc32w(out, out, array_elem);
3680
3681 __ Bind(&aligned8);
3682 __ Subs(len, len, 8);
3683 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3684 __ B(&process_4bytes, lo);
3685
3686 // The main loop processing data by 8 bytes.
3687 __ Bind(&loop);
3688 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3689 __ Subs(len, len, 8);
3690 __ Crc32x(out, out, array_elem.X());
3691 // if len >= 8, process the next 8 bytes.
3692 __ B(&loop, hs);
3693
3694 // Process the data which is less than 8 bytes.
3695 // The code generated below works with values of len
3696 // which come in the range [-8, 0].
3697 // The first three bits are used to detect whether 4 bytes or 2 bytes or
3698 // a byte can be processed.
3699 // The checking order is from bit 2 to bit 0:
3700 // bit 2 is set: at least 4 bytes available
3701 // bit 1 is set: at least 2 bytes available
3702 // bit 0 is set: at least a byte available
3703 __ Bind(&process_4bytes);
3704 // Goto process_2bytes if less than four bytes available
3705 __ Tbz(len, 2, &process_2bytes);
3706 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3707 __ Crc32w(out, out, array_elem);
3708
3709 __ Bind(&process_2bytes);
3710 // Goto process_1bytes if less than two bytes available
3711 __ Tbz(len, 1, &process_1byte);
3712 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3713 __ Crc32h(out, out, array_elem);
3714
3715 __ Bind(&process_1byte);
3716 // Goto done if no bytes available
3717 __ Tbz(len, 0, &done);
3718 __ Ldrb(array_elem, MemOperand(ptr));
3719 __ Crc32b(out, out, array_elem);
3720
3721 __ Bind(&done);
3722 __ Mvn(out, out);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003723}
3724
3725// The threshold for sizes of arrays to use the library provided implementation
3726// of CRC32.updateBytes instead of the intrinsic.
3727static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3728
3729void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3730 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3731 return;
3732 }
3733
3734 LocationSummary* locations =
3735 new (allocator_) LocationSummary(invoke,
3736 LocationSummary::kCallOnSlowPath,
3737 kIntrinsified);
3738
3739 locations->SetInAt(0, Location::RequiresRegister());
3740 locations->SetInAt(1, Location::RequiresRegister());
3741 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3742 locations->SetInAt(3, Location::RequiresRegister());
3743 locations->AddTemp(Location::RequiresRegister());
3744 locations->SetOut(Location::RequiresRegister());
3745}
3746
3747// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3748//
3749// Note: The intrinsic is not used if len exceeds a threshold.
3750void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3751 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3752
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003753 MacroAssembler* masm = GetVIXLAssembler();
3754 LocationSummary* locations = invoke->GetLocations();
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003755
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003756 SlowPathCodeARM64* slow_path =
Vladimir Marko79db6462020-07-31 14:57:32 +01003757 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003758 codegen_->AddSlowPath(slow_path);
3759
3760 Register length = WRegisterFrom(locations->InAt(3));
3761 __ Cmp(length, kCRC32UpdateBytesThreshold);
3762 __ B(slow_path->GetEntryLabel(), hi);
3763
3764 const uint32_t array_data_offset =
3765 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3766 Register ptr = XRegisterFrom(locations->GetTemp(0));
3767 Register array = XRegisterFrom(locations->InAt(1));
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003768 Location offset = locations->InAt(2);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003769 if (offset.IsConstant()) {
3770 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3771 __ Add(ptr, array, array_data_offset + offset_value);
3772 } else {
3773 __ Add(ptr, array, array_data_offset);
3774 __ Add(ptr, ptr, XRegisterFrom(offset));
3775 }
3776
3777 Register crc = WRegisterFrom(locations->InAt(0));
3778 Register out = WRegisterFrom(locations->Out());
3779
3780 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003781
3782 __ Bind(slow_path->GetExitLabel());
3783}
3784
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003785void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3786 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3787 return;
3788 }
3789
3790 LocationSummary* locations =
3791 new (allocator_) LocationSummary(invoke,
3792 LocationSummary::kNoCall,
3793 kIntrinsified);
3794
3795 locations->SetInAt(0, Location::RequiresRegister());
3796 locations->SetInAt(1, Location::RequiresRegister());
3797 locations->SetInAt(2, Location::RequiresRegister());
3798 locations->SetInAt(3, Location::RequiresRegister());
3799 locations->AddTemp(Location::RequiresRegister());
3800 locations->SetOut(Location::RequiresRegister());
3801}
3802
3803// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3804//
3805// There is no need to generate code checking if addr is 0.
3806// The method updateByteBuffer is a private method of java.util.zip.CRC32.
3807// This guarantees no calls outside of the CRC32 class.
3808// An address of DirectBuffer is always passed to the call of updateByteBuffer.
3809// It might be an implementation of an empty DirectBuffer which can use a zero
3810// address but it must have the length to be zero. The current generated code
3811// correctly works with the zero length.
3812void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3813 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3814
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003815 MacroAssembler* masm = GetVIXLAssembler();
3816 LocationSummary* locations = invoke->GetLocations();
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003817
3818 Register addr = XRegisterFrom(locations->InAt(1));
3819 Register ptr = XRegisterFrom(locations->GetTemp(0));
3820 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3821
3822 Register crc = WRegisterFrom(locations->InAt(0));
3823 Register length = WRegisterFrom(locations->InAt(3));
3824 Register out = WRegisterFrom(locations->Out());
3825 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3826}
3827
xueliang.zhong9ce340f2019-01-22 17:46:09 +00003828void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3829 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3830 return;
3831 }
3832
3833 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3834 LocationSummary::kNoCall,
3835 kIntrinsified);
3836 locations->SetInAt(0, Location::RequiresRegister());
3837 locations->SetOut(Location::RequiresFpuRegister());
3838}
3839
3840void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3841 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3842 MacroAssembler* masm = GetVIXLAssembler();
3843 UseScratchRegisterScope scratch_scope(masm);
3844 Register bits = InputRegisterAt(invoke, 0);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01003845 VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3846 VRegister half = scratch_scope.AcquireH();
xueliang.zhong9ce340f2019-01-22 17:46:09 +00003847 __ Fmov(half, bits); // ARMv8.2
3848 __ Fcvt(out, half);
3849}
3850
Vladimir Marko7f958e32019-10-24 09:03:58 +00003851void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3852 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3853 return;
3854 }
3855
3856 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3857 LocationSummary::kNoCall,
3858 kIntrinsified);
3859 locations->SetInAt(0, Location::RequiresFpuRegister());
3860 locations->SetOut(Location::RequiresRegister());
3861}
3862
3863void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3864 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3865 MacroAssembler* masm = GetVIXLAssembler();
3866 UseScratchRegisterScope scratch_scope(masm);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01003867 VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3868 VRegister half = scratch_scope.AcquireH();
Vladimir Marko7f958e32019-10-24 09:03:58 +00003869 Register out = WRegisterFrom(invoke->GetLocations()->Out());
3870 __ Fcvt(half, in);
3871 __ Fmov(out, half);
3872 __ Sxth(out, out); // sign extend due to returning a short type.
3873}
3874
Usama Arifb9f02c22019-10-25 17:37:33 +01003875template<typename OP>
3876void GenerateFP16Round(HInvoke* invoke,
3877 CodeGeneratorARM64* const codegen_,
3878 MacroAssembler* masm,
3879 const OP roundOp) {
3880 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3881 LocationSummary* locations = invoke->GetLocations();
3882 UseScratchRegisterScope scratch_scope(masm);
3883 Register out = WRegisterFrom(locations->Out());
3884 VRegister half = scratch_scope.AcquireH();
3885 __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3886 roundOp(half, half);
3887 __ Fmov(out, half);
3888 __ Sxth(out, out);
3889}
3890
3891void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3892 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3893 return;
3894 }
3895
3896 CreateIntToIntLocations(allocator_, invoke);
3897}
3898
3899void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3900 MacroAssembler* masm = GetVIXLAssembler();
3901 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3902 __ Frintm(out, in); // Round towards Minus infinity
3903 };
3904 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3905}
3906
Usama Arif665aac42019-10-29 11:13:18 +00003907void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
3908 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3909 return;
3910 }
3911
3912 CreateIntToIntLocations(allocator_, invoke);
3913}
3914
3915void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
3916 MacroAssembler* masm = GetVIXLAssembler();
Roland Levillain52f8e5c2019-11-13 17:30:27 +00003917 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
Usama Arif665aac42019-10-29 11:13:18 +00003918 __ Frintp(out, in); // Round towards Plus infinity
3919 };
3920 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3921}
3922
Usama Arif681692b2019-10-30 16:23:26 +00003923void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
3924 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3925 return;
3926 }
3927
3928 CreateIntToIntLocations(allocator_, invoke);
3929}
3930
3931void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
3932 MacroAssembler* masm = GetVIXLAssembler();
Roland Levillain52f8e5c2019-11-13 17:30:27 +00003933 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
Usama Arif681692b2019-10-30 16:23:26 +00003934 __ Frintn(out, in); // Round to nearest, with ties to even
3935 };
3936 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3937}
3938
Usama Arif39e29792019-11-15 10:53:29 +00003939void FP16ComparisonLocations(HInvoke* invoke,
3940 ArenaAllocator* allocator_,
3941 CodeGeneratorARM64* codegen_,
3942 int requiredTemps) {
3943 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3944 return;
3945 }
3946
3947 CreateIntIntToIntLocations(allocator_, invoke);
3948 for (int i = 0; i < requiredTemps; i++) {
3949 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3950 }
3951}
3952
Usama Arif457e9fa2019-11-11 15:29:59 +00003953template<typename OP>
3954void GenerateFP16Compare(HInvoke* invoke,
3955 CodeGeneratorARM64* codegen,
3956 MacroAssembler* masm,
3957 const OP compareOp) {
3958 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
3959 LocationSummary* locations = invoke->GetLocations();
3960 Register out = WRegisterFrom(locations->Out());
3961 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
3962 VRegister half1 = HRegisterFrom(locations->GetTemp(1));
3963 __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
3964 __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
3965 compareOp(out, half0, half1);
3966}
3967
3968static inline void GenerateFP16Compare(HInvoke* invoke,
3969 CodeGeneratorARM64* codegen,
3970 MacroAssembler* masm,
3971 vixl::aarch64::Condition cond) {
3972 auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
3973 __ Fcmp(in0, in1);
3974 __ Cset(out, cond);
3975 };
3976 GenerateFP16Compare(invoke, codegen, masm, compareOp);
3977}
3978
3979void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00003980 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00003981}
3982
3983void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
3984 MacroAssembler* masm = GetVIXLAssembler();
3985 GenerateFP16Compare(invoke, codegen_, masm, gt);
3986}
3987
3988void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00003989 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00003990}
3991
3992void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3993 MacroAssembler* masm = GetVIXLAssembler();
3994 GenerateFP16Compare(invoke, codegen_, masm, ge);
3995}
3996
3997void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00003998 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00003999}
4000
4001void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
4002 MacroAssembler* masm = GetVIXLAssembler();
4003 GenerateFP16Compare(invoke, codegen_, masm, mi);
4004}
4005
4006void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004007 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00004008}
4009
4010void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
4011 MacroAssembler* masm = GetVIXLAssembler();
4012 GenerateFP16Compare(invoke, codegen_, masm, ls);
4013}
4014
Usama Arifecbdc072019-11-13 13:32:54 +00004015void IntrinsicLocationsBuilderARM64::VisitFP16Compare(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004016 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arifecbdc072019-11-13 13:32:54 +00004017}
4018
4019void IntrinsicCodeGeneratorARM64::VisitFP16Compare(HInvoke* invoke) {
4020 MacroAssembler* masm = GetVIXLAssembler();
4021 auto compareOp = [masm](const Register out,
4022 const VRegister& in0,
4023 const VRegister& in1) {
4024 vixl::aarch64::Label end;
4025 vixl::aarch64::Label equal;
4026 vixl::aarch64::Label normal;
4027
4028 // The normal cases for this method are:
4029 // - in0 > in1 => out = 1
4030 // - in0 < in1 => out = -1
4031 // - in0 == in1 => out = 0
4032 // +/-Infinity are ordered by default so are handled by the normal case.
4033 // There are two special cases that Fcmp is insufficient for distinguishing:
4034 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4035 // - in0 or in1 is NaN => manually compare with in0 and in1 separately
4036 __ Fcmp(in0, in1);
4037 __ B(eq, &equal); // in0==in1 or +0 -0 case.
4038 __ B(vc, &normal); // in0 and in1 are ordered (not NaN).
4039
4040 // Either of the inputs is NaN.
4041 // NaN is equal to itself and greater than any other number so:
4042 // - if only in0 is NaN => return 1
4043 // - if only in1 is NaN => return -1
4044 // - if both in0 and in1 are NaN => return 0
4045 __ Fcmp(in0, 0.0);
4046 __ Mov(out, -1);
4047 __ B(vc, &end); // in0 != NaN => out = -1.
4048 __ Fcmp(in1, 0.0);
4049 __ Cset(out, vc); // if in1 != NaN => out = 1, otherwise both are NaNs => out = 0.
4050 __ B(&end);
4051
4052 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4053 __ Bind(&equal);
4054 // Compare encoding of in0 and in1 as the denormal fraction of single precision float.
4055 // Reverse operand order because -0 > +0 when compared as S registers.
4056 // The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4057 // Therefore the value of bits[127:16] will not matter when doing the
4058 // below Fcmp as they are set to 0.
4059 __ Fcmp(in1.S(), in0.S());
4060
4061 __ Bind(&normal);
4062 __ Cset(out, gt); // if in0 > in1 => out = 1, otherwise out = 0.
4063 // Note: could be from equals path or original comparison
4064 __ Csinv(out, out, wzr, pl); // if in0 >= in1 out=out, otherwise out=-1.
4065
4066 __ Bind(&end);
4067 };
4068
4069 GenerateFP16Compare(invoke, codegen_, masm, compareOp);
4070}
4071
Usama Arif39e29792019-11-15 10:53:29 +00004072const int kFP16NaN = 0x7e00;
4073
4074static inline void GenerateFP16MinMax(HInvoke* invoke,
4075 CodeGeneratorARM64* codegen,
4076 MacroAssembler* masm,
4077 vixl::aarch64::Condition cond) {
4078 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4079 LocationSummary* locations = invoke->GetLocations();
4080
4081 vixl::aarch64::Label equal;
4082 vixl::aarch64::Label end;
4083
4084 UseScratchRegisterScope temps(masm);
4085
4086 Register out = WRegisterFrom(locations->Out());
4087 Register in0 = WRegisterFrom(locations->InAt(0));
4088 Register in1 = WRegisterFrom(locations->InAt(1));
4089 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4090 VRegister half1 = temps.AcquireH();
4091
4092 // The normal cases for this method are:
4093 // - in0.h == in1.h => out = in0 or in1
4094 // - in0.h <cond> in1.h => out = in0
4095 // - in0.h <!cond> in1.h => out = in1
4096 // +/-Infinity are ordered by default so are handled by the normal case.
4097 // There are two special cases that Fcmp is insufficient for distinguishing:
4098 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4099 // - in0 or in1 is NaN => out = NaN
4100 __ Fmov(half0, in0);
4101 __ Fmov(half1, in1);
4102 __ Fcmp(half0, half1);
4103 __ B(eq, &equal); // half0 = half1 or +0/-0 case.
4104 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4105 __ B(vc, &end); // None of the inputs were NaN.
4106
4107 // Atleast one input was NaN.
4108 __ Mov(out, kFP16NaN); // out=NaN.
4109 __ B(&end);
4110
4111 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4112 __ Bind(&equal);
4113 // Fcmp cannot normally distinguish +0 and -0 so compare encoding.
4114 // Encoding is compared as the denormal fraction of a Single.
4115 // Note: encoding of -0 > encoding of +0 despite +0 > -0 so in0 and in1 are swapped.
4116 // Note: The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4117 __ Fcmp(half1.S(), half0.S());
4118
4119 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4120
4121 __ Bind(&end);
4122}
4123
4124void IntrinsicLocationsBuilderARM64::VisitFP16Min(HInvoke* invoke) {
4125 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4126}
4127
4128void IntrinsicCodeGeneratorARM64::VisitFP16Min(HInvoke* invoke) {
4129 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4130 MacroAssembler* masm = GetVIXLAssembler();
4131 GenerateFP16MinMax(invoke, codegen_, masm, mi);
4132}
4133
4134void IntrinsicLocationsBuilderARM64::VisitFP16Max(HInvoke* invoke) {
4135 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4136}
4137
4138void IntrinsicCodeGeneratorARM64::VisitFP16Max(HInvoke* invoke) {
4139 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4140 MacroAssembler* masm = GetVIXLAssembler();
4141 GenerateFP16MinMax(invoke, codegen_, masm, gt);
4142}
4143
Artem Serova3bd4ec2020-08-27 16:26:17 +01004144static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4145 LocationSummary* locations = invoke->GetLocations();
4146 MacroAssembler* masm = codegen->GetVIXLAssembler();
4147 DataType::Type type = invoke->GetType();
4148 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4149
4150 Register dividend = RegisterFrom(locations->InAt(0), type);
4151 Register divisor = RegisterFrom(locations->InAt(1), type);
4152 Register out = RegisterFrom(locations->Out(), type);
4153
4154 // Check if divisor is zero, bail to managed implementation to handle.
4155 SlowPathCodeARM64* slow_path =
4156 new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
4157 codegen->AddSlowPath(slow_path);
4158 __ Cbz(divisor, slow_path->GetEntryLabel());
4159
4160 __ Udiv(out, dividend, divisor);
4161
4162 __ Bind(slow_path->GetExitLabel());
4163}
4164
4165void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4166 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4167}
4168
4169void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4170 GenerateDivideUnsigned(invoke, codegen_);
4171}
4172
4173void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4174 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4175}
4176
4177void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4178 GenerateDivideUnsigned(invoke, codegen_);
4179}
4180
Nikita Iashchenko745da802021-01-20 21:52:54 +00004181void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4182 CreateIntIntToIntLocations(allocator_, invoke);
4183}
4184
4185void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4186 LocationSummary* locations = invoke->GetLocations();
4187 MacroAssembler* masm = codegen_->GetVIXLAssembler();
4188 DataType::Type type = invoke->GetType();
4189 DCHECK(type == DataType::Type::kInt64);
4190
4191 Register x = RegisterFrom(locations->InAt(0), type);
4192 Register y = RegisterFrom(locations->InAt(1), type);
4193 Register out = RegisterFrom(locations->Out(), type);
4194
4195 __ Smulh(out, x, y);
4196}
4197
Nikita Iashchenko3fa6e462021-09-10 17:30:04 +01004198static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4199 MacroAssembler* masm = codegen->GetVIXLAssembler();
4200
4201 VRegister n = helpers::InputFPRegisterAt(invoke, 0);
4202 VRegister m = helpers::InputFPRegisterAt(invoke, 1);
4203 VRegister a = helpers::InputFPRegisterAt(invoke, 2);
4204 VRegister out = helpers::OutputFPRegister(invoke);
4205
4206 __ Fmadd(out, n, m, a);
4207}
4208
4209void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) {
4210 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4211}
4212
4213void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
4214 GenerateMathFma(invoke, codegen_);
4215}
4216
4217void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) {
4218 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4219}
4220
4221void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) {
4222 GenerateMathFma(invoke, codegen_);
4223}
4224
Vladimir Marko98873af2020-12-16 12:10:03 +00004225class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
4226 public:
4227 VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
4228 : IntrinsicSlowPathARM64(invoke),
4229 order_(order),
4230 return_success_(false),
4231 strong_(false),
4232 get_and_update_op_(GetAndUpdateOp::kAdd) {
4233 }
4234
4235 vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
4236 return &byte_array_view_check_label_;
4237 }
4238
4239 vixl::aarch64::Label* GetNativeByteOrderLabel() {
4240 return &native_byte_order_label_;
4241 }
4242
4243 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4244 if (return_success) {
4245 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4246 } else {
4247 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4248 }
4249 return_success_ = return_success;
4250 strong_ = strong;
4251 }
4252
4253 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4254 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4255 get_and_update_op_ = get_and_update_op;
4256 }
4257
4258 void EmitNativeCode(CodeGenerator* codegen_in) override {
4259 if (GetByteArrayViewCheckLabel()->IsLinked()) {
4260 EmitByteArrayViewCode(codegen_in);
4261 }
4262 IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
4263 }
4264
4265 private:
4266 HInvoke* GetInvoke() const {
4267 return GetInstruction()->AsInvoke();
4268 }
4269
4270 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4271 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4272 }
4273
4274 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4275
4276 vixl::aarch64::Label byte_array_view_check_label_;
4277 vixl::aarch64::Label native_byte_order_label_;
4278 // Shared parameter for all VarHandle intrinsics.
4279 std::memory_order order_;
4280 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4281 bool return_success_;
4282 bool strong_;
4283 // Extra argument for GenerateVarHandleGetAndUpdate().
4284 GetAndUpdateOp get_and_update_op_;
4285};
4286
Vladimir Markoa41ea272020-09-07 15:24:36 +00004287// Generate subtype check without read barriers.
4288static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
4289 SlowPathCodeARM64* slow_path,
4290 Register object,
4291 Register type,
4292 bool object_can_be_null = true) {
4293 MacroAssembler* masm = codegen->GetVIXLAssembler();
4294
4295 const MemberOffset class_offset = mirror::Object::ClassOffset();
4296 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4297
4298 vixl::aarch64::Label success;
4299 if (object_can_be_null) {
4300 __ Cbz(object, &success);
4301 }
4302
4303 UseScratchRegisterScope temps(masm);
4304 Register temp = temps.AcquireW();
4305
4306 __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
4307 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4308 vixl::aarch64::Label loop;
4309 __ Bind(&loop);
Vladimir Markoe00e7d22020-09-11 14:10:40 +00004310 __ Cmp(type, temp);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004311 __ B(&success, eq);
4312 __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
4313 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4314 __ Cbz(temp, slow_path->GetEntryLabel());
4315 __ B(&loop);
4316 __ Bind(&success);
4317}
4318
Vladimir Markoe17530a2020-11-11 17:02:26 +00004319// Check access mode and the primitive type from VarHandle.varType.
Vladimir Marko479cbad2020-12-10 16:10:09 +00004320// Check reference arguments against the VarHandle.varType; for references this is a subclass
4321// check without read barrier, so it can have false negatives which we handle in the slow path.
Vladimir Markoe17530a2020-11-11 17:02:26 +00004322static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4323 CodeGeneratorARM64* codegen,
4324 SlowPathCodeARM64* slow_path,
4325 DataType::Type type) {
4326 mirror::VarHandle::AccessMode access_mode =
4327 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4328 Primitive::Type primitive_type = DataTypeToPrimitive(type);
4329
4330 MacroAssembler* masm = codegen->GetVIXLAssembler();
4331 Register varhandle = InputRegisterAt(invoke, 0);
4332
4333 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4334 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4335 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4336
4337 UseScratchRegisterScope temps(masm);
4338 Register var_type_no_rb = temps.AcquireW();
4339 Register temp2 = temps.AcquireW();
4340
4341 // Check that the operation is permitted and the primitive type of varhandle.varType.
4342 // We do not need a read barrier when loading a reference only for loading constant
4343 // primitive field through the reference. Use LDP to load the fields together.
4344 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4345 __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
4346 codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
4347 __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
4348 __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
Vladimir Marko436977d2020-11-12 12:41:06 +00004349 if (primitive_type == Primitive::kPrimNot) {
4350 static_assert(Primitive::kPrimNot == 0);
4351 __ Cbnz(temp2, slow_path->GetEntryLabel());
4352 } else {
4353 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4354 __ B(slow_path->GetEntryLabel(), ne);
4355 }
Vladimir Markoe17530a2020-11-11 17:02:26 +00004356
4357 temps.Release(temp2);
4358
4359 if (type == DataType::Type::kReference) {
4360 // Check reference arguments against the varType.
4361 // False negatives due to varType being an interface or array type
4362 // or due to the missing read barrier are handled by the slow path.
4363 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4364 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4365 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4366 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4367 HInstruction* arg = invoke->InputAt(arg_index);
4368 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4369 if (!arg->IsNullConstant()) {
4370 Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4371 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4372 }
4373 }
4374 }
4375}
4376
Vladimir Markoa41ea272020-09-07 15:24:36 +00004377static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4378 CodeGeneratorARM64* codegen,
4379 SlowPathCodeARM64* slow_path) {
4380 MacroAssembler* masm = codegen->GetVIXLAssembler();
4381 Register varhandle = InputRegisterAt(invoke, 0);
4382
4383 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4384
4385 UseScratchRegisterScope temps(masm);
4386 Register temp = temps.AcquireW();
4387
4388 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4389 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4390 __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4391 __ Cbnz(temp, slow_path->GetEntryLabel());
4392}
4393
Vladimir Marko479cbad2020-12-10 16:10:09 +00004394static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4395 CodeGeneratorARM64* codegen,
4396 SlowPathCodeARM64* slow_path) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004397 VarHandleOptimizations optimizations(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004398 MacroAssembler* masm = codegen->GetVIXLAssembler();
4399 Register varhandle = InputRegisterAt(invoke, 0);
4400 Register object = InputRegisterAt(invoke, 1);
4401
4402 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4403 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4404
Vladimir Marko479cbad2020-12-10 16:10:09 +00004405 // Null-check the object.
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004406 if (!optimizations.GetSkipObjectNullCheck()) {
4407 __ Cbz(object, slow_path->GetEntryLabel());
4408 }
Vladimir Marko479cbad2020-12-10 16:10:09 +00004409
Vladimir Markoa41ea272020-09-07 15:24:36 +00004410 UseScratchRegisterScope temps(masm);
4411 Register temp = temps.AcquireW();
4412 Register temp2 = temps.AcquireW();
4413
Vladimir Markoa41ea272020-09-07 15:24:36 +00004414 // Check that the VarHandle references an instance field by checking that
4415 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4416 // type compatibility check with the source object's type, which will fail for null.
4417 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4418 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
Vladimir Markoe00e7d22020-09-11 14:10:40 +00004419 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4420 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
Vladimir Markoa41ea272020-09-07 15:24:36 +00004421 __ Cbnz(temp2, slow_path->GetEntryLabel());
4422
4423 // Check that the object has the correct type.
4424 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4425 temps.Release(temp2); // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4426 GenerateSubTypeObjectCheckNoReadBarrier(
4427 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4428}
4429
Vladimir Marko479cbad2020-12-10 16:10:09 +00004430static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4431 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004432 VarHandleSlowPathARM64* slow_path) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004433 VarHandleOptimizations optimizations(invoke);
Vladimir Marko479cbad2020-12-10 16:10:09 +00004434 MacroAssembler* masm = codegen->GetVIXLAssembler();
4435 Register varhandle = InputRegisterAt(invoke, 0);
4436 Register object = InputRegisterAt(invoke, 1);
4437 Register index = InputRegisterAt(invoke, 2);
4438 DataType::Type value_type =
4439 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4440 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4441
4442 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4443 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4444 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4445 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4446 const MemberOffset class_offset = mirror::Object::ClassOffset();
4447 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4448
4449 // Null-check the object.
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004450 if (!optimizations.GetSkipObjectNullCheck()) {
4451 __ Cbz(object, slow_path->GetEntryLabel());
4452 }
Vladimir Marko479cbad2020-12-10 16:10:09 +00004453
4454 UseScratchRegisterScope temps(masm);
4455 Register temp = temps.AcquireW();
4456 Register temp2 = temps.AcquireW();
4457
4458 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4459 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4460 // coordinateType0 shall not be null but we do not explicitly verify that.
4461 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4462 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4463 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4464 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4465 __ Cbz(temp2, slow_path->GetEntryLabel());
4466
Vladimir Marko479cbad2020-12-10 16:10:09 +00004467 // Check object class against componentType0.
4468 //
4469 // This is an exact check and we defer other cases to the runtime. This includes
4470 // conversion to array of superclass references, which is valid but subsequently
4471 // requires all update operations to check that the value can indeed be stored.
4472 // We do not want to perform such extra checks in the intrinsified code.
4473 //
4474 // We do this check without read barrier, so there can be false negatives which we
4475 // defer to the slow path. There shall be no false negatives for array classes in the
4476 // boot image (including Object[] and primitive arrays) because they are non-movable.
4477 __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4478 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4479 __ Cmp(temp, temp2);
4480 __ B(slow_path->GetEntryLabel(), ne);
4481
Vladimir Marko98873af2020-12-16 12:10:03 +00004482 // Check that the coordinateType0 is an array type. We do not need a read barrier
4483 // for loading constant reference fields (or chains of them) for comparison with null,
Vladimir Marko7968cae2021-01-19 12:02:35 +00004484 // nor for finally loading a constant primitive field (primitive type) below.
Vladimir Marko98873af2020-12-16 12:10:03 +00004485 __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4486 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4487 __ Cbz(temp2, slow_path->GetEntryLabel());
4488
4489 // Check that the array component type matches the primitive type.
4490 __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4491 if (primitive_type == Primitive::kPrimNot) {
4492 static_assert(Primitive::kPrimNot == 0);
4493 __ Cbnz(temp2, slow_path->GetEntryLabel());
4494 } else {
4495 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4496 // we shall check for a byte array view in the slow path.
4497 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4498 // so we cannot emit that if we're JITting without boot image.
4499 bool boot_image_available =
4500 codegen->GetCompilerOptions().IsBootImage() ||
4501 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4502 DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
Ulyana Trafimovich3693b2a2021-10-29 10:43:18 +00004503 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
Vladimir Marko98873af2020-12-16 12:10:03 +00004504 vixl::aarch64::Label* slow_path_label =
4505 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4506 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4507 __ B(slow_path_label, ne);
4508 }
4509
Vladimir Marko479cbad2020-12-10 16:10:09 +00004510 // Check for array index out of bounds.
4511 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4512 __ Cmp(index, temp);
4513 __ B(slow_path->GetEntryLabel(), hs);
4514}
4515
4516static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4517 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004518 VarHandleSlowPathARM64* slow_path) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004519 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004520 if (expected_coordinates_count == 0u) {
4521 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
Vladimir Marko479cbad2020-12-10 16:10:09 +00004522 } else if (expected_coordinates_count == 1u) {
4523 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004524 } else {
Vladimir Marko479cbad2020-12-10 16:10:09 +00004525 DCHECK_EQ(expected_coordinates_count, 2u);
4526 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004527 }
4528}
4529
Vladimir Marko98873af2020-12-16 12:10:03 +00004530static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4531 CodeGeneratorARM64* codegen,
4532 std::memory_order order,
4533 DataType::Type type) {
4534 VarHandleSlowPathARM64* slow_path =
4535 new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4536 codegen->AddSlowPath(slow_path);
4537
4538 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4539 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4540
4541 return slow_path;
4542}
4543
Vladimir Marko436977d2020-11-12 12:41:06 +00004544struct VarHandleTarget {
4545 Register object; // The object holding the value to operate on.
4546 Register offset; // The offset of the value to operate on.
4547};
4548
Vladimir Marko98873af2020-12-16 12:10:03 +00004549static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004550 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko436977d2020-11-12 12:41:06 +00004551 LocationSummary* locations = invoke->GetLocations();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004552
Vladimir Marko436977d2020-11-12 12:41:06 +00004553 VarHandleTarget target;
4554 // The temporary allocated for loading the offset.
Vladimir Marko98873af2020-12-16 12:10:03 +00004555 target.offset = WRegisterFrom(locations->GetTemp(0u));
Vladimir Marko479cbad2020-12-10 16:10:09 +00004556 // The reference to the object that holds the value to operate on.
Vladimir Marko436977d2020-11-12 12:41:06 +00004557 target.object = (expected_coordinates_count == 0u)
Vladimir Marko98873af2020-12-16 12:10:03 +00004558 ? WRegisterFrom(locations->GetTemp(1u))
Vladimir Marko436977d2020-11-12 12:41:06 +00004559 : InputRegisterAt(invoke, 1);
Vladimir Marko98873af2020-12-16 12:10:03 +00004560 return target;
4561}
4562
4563static void GenerateVarHandleTarget(HInvoke* invoke,
4564 const VarHandleTarget& target,
4565 CodeGeneratorARM64* codegen) {
4566 MacroAssembler* masm = codegen->GetVIXLAssembler();
4567 Register varhandle = InputRegisterAt(invoke, 0);
4568 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko436977d2020-11-12 12:41:06 +00004569
Vladimir Marko479cbad2020-12-10 16:10:09 +00004570 if (expected_coordinates_count <= 1u) {
4571 // For static fields, we need to fill the `target.object` with the declaring class,
4572 // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4573 // we do not need the declaring class, so we can forget the `ArtMethod*` when
4574 // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4575 Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
Vladimir Markoa41ea272020-09-07 15:24:36 +00004576
Vladimir Marko479cbad2020-12-10 16:10:09 +00004577 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4578 const MemberOffset offset_offset = ArtField::OffsetOffset();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004579
Vladimir Marko479cbad2020-12-10 16:10:09 +00004580 // Load the ArtField, the offset and, if needed, declaring class.
4581 __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4582 __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
4583 if (expected_coordinates_count == 0u) {
4584 codegen->GenerateGcRootFieldLoad(invoke,
4585 LocationFrom(target.object),
4586 method.X(),
4587 ArtField::DeclaringClassOffset().Int32Value(),
4588 /*fixup_label=*/ nullptr,
4589 kCompilerReadBarrierOption);
4590 }
4591 } else {
4592 DCHECK_EQ(expected_coordinates_count, 2u);
4593 DataType::Type value_type =
4594 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4595 size_t size_shift = DataType::SizeShift(value_type);
4596 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4597
4598 Register index = InputRegisterAt(invoke, 2);
4599 Register shifted_index = index;
4600 if (size_shift != 0u) {
4601 shifted_index = target.offset;
4602 __ Lsl(shifted_index, index, size_shift);
4603 }
4604 __ Add(target.offset, shifted_index, data_offset.Int32Value());
Vladimir Markoa41ea272020-09-07 15:24:36 +00004605 }
4606}
4607
Vladimir Marko479cbad2020-12-10 16:10:09 +00004608static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004609 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4610 DataType::Type return_type = invoke->GetType();
4611
Vladimir Markoa41ea272020-09-07 15:24:36 +00004612 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4613 LocationSummary* locations =
4614 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4615 locations->SetInAt(0, Location::RequiresRegister());
Vladimir Marko479cbad2020-12-10 16:10:09 +00004616 // Require coordinates in registers. These are the object holding the value
4617 // to operate on (except for static fields) and index (for arrays and views).
4618 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4619 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4620 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004621 if (return_type != DataType::Type::kVoid) {
4622 if (DataType::IsFloatingPointType(return_type)) {
4623 locations->SetOut(Location::RequiresFpuRegister());
4624 } else {
4625 locations->SetOut(Location::RequiresRegister());
4626 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004627 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004628 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4629 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4630 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4631 HInstruction* arg = invoke->InputAt(arg_index);
4632 if (IsConstantZeroBitPattern(arg)) {
4633 locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
4634 } else if (DataType::IsFloatingPointType(arg->GetType())) {
4635 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4636 } else {
4637 locations->SetInAt(arg_index, Location::RequiresRegister());
4638 }
4639 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004640
4641 // Add a temporary for offset.
4642 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4643 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4644 // To preserve the offset value across the non-Baker read barrier slow path
4645 // for loading the declaring class, use a fixed callee-save register.
Vladimir Markoc8178f52020-11-24 10:38:16 +00004646 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4647 locations->AddTemp(Location::RegisterLocation(first_callee_save));
Vladimir Marko436977d2020-11-12 12:41:06 +00004648 } else {
4649 locations->AddTemp(Location::RequiresRegister());
4650 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004651 if (expected_coordinates_count == 0u) {
4652 // Add a temporary to hold the declaring class.
4653 locations->AddTemp(Location::RequiresRegister());
4654 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004655
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004656 return locations;
4657}
4658
4659static void CreateVarHandleGetLocations(HInvoke* invoke) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004660 VarHandleOptimizations optimizations(invoke);
4661 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004662 return;
4663 }
4664
Vladimir Markoe17530a2020-11-11 17:02:26 +00004665 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4666 invoke->GetType() == DataType::Type::kReference &&
4667 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4668 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4669 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4670 // the passed reference and reloads it from the field. This gets the memory visibility
Vladimir Marko436977d2020-11-12 12:41:06 +00004671 // wrong for Acquire/Volatile operations. b/173104084
Vladimir Markoe17530a2020-11-11 17:02:26 +00004672 return;
4673 }
4674
Vladimir Marko479cbad2020-12-10 16:10:09 +00004675 CreateVarHandleCommonLocations(invoke);
Vladimir Marko79db6462020-07-31 14:57:32 +01004676}
4677
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004678static void GenerateVarHandleGet(HInvoke* invoke,
4679 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004680 std::memory_order order,
4681 bool byte_swap = false) {
Vladimir Marko79db6462020-07-31 14:57:32 +01004682 DataType::Type type = invoke->GetType();
4683 DCHECK_NE(type, DataType::Type::kVoid);
Vladimir Marko79db6462020-07-31 14:57:32 +01004684
Vladimir Markoa41ea272020-09-07 15:24:36 +00004685 LocationSummary* locations = invoke->GetLocations();
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004686 MacroAssembler* masm = codegen->GetVIXLAssembler();
Vladimir Marko79db6462020-07-31 14:57:32 +01004687 CPURegister out = helpers::OutputCPURegister(invoke);
Vladimir Marko79db6462020-07-31 14:57:32 +01004688
Vladimir Marko98873af2020-12-16 12:10:03 +00004689 VarHandleTarget target = GetVarHandleTarget(invoke);
4690 VarHandleSlowPathARM64* slow_path = nullptr;
4691 if (!byte_swap) {
4692 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4693 GenerateVarHandleTarget(invoke, target, codegen);
4694 __ Bind(slow_path->GetNativeByteOrderLabel());
4695 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004696
Vladimir Marko98873af2020-12-16 12:10:03 +00004697 // ARM64 load-acquire instructions are implicitly sequentially consistent.
4698 bool use_load_acquire =
4699 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
4700 DCHECK(use_load_acquire || order == std::memory_order_relaxed);
Vladimir Marko79db6462020-07-31 14:57:32 +01004701
Vladimir Marko479cbad2020-12-10 16:10:09 +00004702 // Load the value from the target location.
Vladimir Markoa41ea272020-09-07 15:24:36 +00004703 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4704 // Piggy-back on the field load path using introspection for the Baker read barrier.
Vladimir Marko436977d2020-11-12 12:41:06 +00004705 // The `target.offset` is a temporary, use it for field address.
4706 Register tmp_ptr = target.offset.X();
4707 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004708 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
4709 locations->Out(),
Vladimir Marko436977d2020-11-12 12:41:06 +00004710 target.object,
4711 MemOperand(tmp_ptr),
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004712 /*needs_null_check=*/ false,
4713 use_load_acquire);
Vladimir Marko98873af2020-12-16 12:10:03 +00004714 DCHECK(!byte_swap);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004715 } else {
Vladimir Marko436977d2020-11-12 12:41:06 +00004716 MemOperand address(target.object.X(), target.offset.X());
Vladimir Marko98873af2020-12-16 12:10:03 +00004717 CPURegister load_reg = out;
4718 DataType::Type load_type = type;
4719 UseScratchRegisterScope temps(masm);
4720 if (byte_swap) {
4721 if (type == DataType::Type::kInt16) {
4722 // Avoid unnecessary sign extension before REV16.
4723 load_type = DataType::Type::kUint16;
4724 } else if (type == DataType::Type::kFloat32) {
4725 load_type = DataType::Type::kInt32;
4726 load_reg = target.offset.W();
4727 } else if (type == DataType::Type::kFloat64) {
4728 load_type = DataType::Type::kInt64;
4729 load_reg = target.offset.X();
4730 }
4731 }
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004732 if (use_load_acquire) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004733 codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004734 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00004735 codegen->Load(load_type, load_reg, address);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004736 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004737 if (type == DataType::Type::kReference) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004738 DCHECK(!byte_swap);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004739 DCHECK(out.IsW());
4740 Location out_loc = locations->Out();
Vladimir Marko436977d2020-11-12 12:41:06 +00004741 Location object_loc = LocationFrom(target.object);
4742 Location offset_loc = LocationFrom(target.offset);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004743 codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
Vladimir Marko98873af2020-12-16 12:10:03 +00004744 } else if (byte_swap) {
4745 GenerateReverseBytes(masm, type, load_reg, out);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004746 }
4747 }
4748
Vladimir Marko98873af2020-12-16 12:10:03 +00004749 if (!byte_swap) {
4750 __ Bind(slow_path->GetExitLabel());
4751 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004752}
4753
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004754void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
4755 CreateVarHandleGetLocations(invoke);
4756}
4757
4758void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004759 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004760}
4761
4762void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4763 CreateVarHandleGetLocations(invoke);
4764}
4765
4766void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004767 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004768}
4769
4770void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4771 CreateVarHandleGetLocations(invoke);
4772}
4773
4774void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004775 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004776}
4777
4778void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4779 CreateVarHandleGetLocations(invoke);
4780}
4781
4782void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004783 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004784}
4785
4786static void CreateVarHandleSetLocations(HInvoke* invoke) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004787 VarHandleOptimizations optimizations(invoke);
4788 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004789 return;
4790 }
4791
Vladimir Marko479cbad2020-12-10 16:10:09 +00004792 CreateVarHandleCommonLocations(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004793}
4794
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004795static void GenerateVarHandleSet(HInvoke* invoke,
4796 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004797 std::memory_order order,
4798 bool byte_swap = false) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004799 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4800 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4801
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004802 MacroAssembler* masm = codegen->GetVIXLAssembler();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004803 CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
4804
Vladimir Marko98873af2020-12-16 12:10:03 +00004805 VarHandleTarget target = GetVarHandleTarget(invoke);
4806 VarHandleSlowPathARM64* slow_path = nullptr;
4807 if (!byte_swap) {
4808 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4809 GenerateVarHandleTarget(invoke, target, codegen);
4810 __ Bind(slow_path->GetNativeByteOrderLabel());
4811 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004812
Vladimir Marko98873af2020-12-16 12:10:03 +00004813 // ARM64 store-release instructions are implicitly sequentially consistent.
4814 bool use_store_release =
4815 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
4816 DCHECK(use_store_release || order == std::memory_order_relaxed);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004817
Vladimir Marko479cbad2020-12-10 16:10:09 +00004818 // Store the value to the target location.
Vladimir Markoa41ea272020-09-07 15:24:36 +00004819 {
4820 CPURegister source = value;
4821 UseScratchRegisterScope temps(masm);
4822 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4823 DCHECK(value.IsW());
4824 Register temp = temps.AcquireW();
4825 __ Mov(temp, value.W());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004826 codegen->GetAssembler()->PoisonHeapReference(temp);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004827 source = temp;
4828 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004829 if (byte_swap) {
4830 DCHECK(!source.IsZero()); // We use the main path for zero as it does not need a byte swap.
4831 Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
4832 if (value_type == DataType::Type::kInt16) {
4833 // Avoid unnecessary sign extension before storing.
4834 value_type = DataType::Type::kUint16;
4835 } else if (DataType::IsFloatingPointType(value_type)) {
4836 __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
4837 value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
4838 source = temp; // Source for the `GenerateReverseBytes()` below.
4839 }
4840 GenerateReverseBytes(masm, value_type, source, temp);
4841 source = temp;
4842 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004843 MemOperand address(target.object.X(), target.offset.X());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004844 if (use_store_release) {
4845 codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
4846 } else {
4847 codegen->Store(value_type, source, address);
4848 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004849 }
4850
4851 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
Vladimir Marko436977d2020-11-12 12:41:06 +00004852 codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004853 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004854
Vladimir Marko98873af2020-12-16 12:10:03 +00004855 if (!byte_swap) {
4856 __ Bind(slow_path->GetExitLabel());
4857 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004858}
4859
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004860void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
4861 CreateVarHandleSetLocations(invoke);
4862}
4863
4864void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004865 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004866}
4867
4868void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4869 CreateVarHandleSetLocations(invoke);
4870}
4871
4872void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004873 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004874}
4875
4876void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4877 CreateVarHandleSetLocations(invoke);
4878}
4879
4880void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004881 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004882}
4883
4884void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4885 CreateVarHandleSetLocations(invoke);
4886}
4887
4888void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004889 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004890}
4891
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004892static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004893 VarHandleOptimizations optimizations(invoke);
4894 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004895 return;
4896 }
4897
Vladimir Markoe17530a2020-11-11 17:02:26 +00004898 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
Vladimir Marko98873af2020-12-16 12:10:03 +00004899 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
Vladimir Markoe17530a2020-11-11 17:02:26 +00004900 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4901 value_type == DataType::Type::kReference) {
4902 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4903 // the passed reference and reloads it from the field. This breaks the read barriers
4904 // in slow path in different ways. The marked old value may not actually be a to-space
4905 // reference to the same object as `old_value`, breaking slow path assumptions. And
4906 // for CompareAndExchange, marking the old value after comparison failure may actually
4907 // return the reference to `expected`, erroneously indicating success even though we
Vladimir Marko436977d2020-11-12 12:41:06 +00004908 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
Vladimir Markoe17530a2020-11-11 17:02:26 +00004909 return;
4910 }
4911
Vladimir Marko479cbad2020-12-10 16:10:09 +00004912 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004913
Vladimir Markoc8178f52020-11-24 10:38:16 +00004914 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4915 // We need callee-save registers for both the class object and offset instead of
Vladimir Marko479cbad2020-12-10 16:10:09 +00004916 // the temporaries reserved in CreateVarHandleCommonLocations().
Vladimir Markoc8178f52020-11-24 10:38:16 +00004917 static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
4918 uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4919 uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
4920 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4921 DCHECK_EQ(locations->GetTempCount(), 2u);
4922 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4923 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4924 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4925 } else {
4926 DCHECK_EQ(locations->GetTempCount(), 1u);
4927 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4928 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4929 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004930 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004931 size_t old_temp_count = locations->GetTempCount();
4932 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4933 if (!return_success) {
4934 if (DataType::IsFloatingPointType(value_type)) {
4935 // Add a temporary for old value and exclusive store result if floating point
4936 // `expected` and/or `new_value` take scratch registers.
4937 size_t available_scratch_registers =
4938 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
4939 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
4940 size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
4941 // We can reuse the declaring class (if present) and offset temporary.
4942 if (temps_needed > old_temp_count) {
4943 locations->AddRegisterTemps(temps_needed - old_temp_count);
4944 }
4945 } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
4946 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
4947 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
4948 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4949 // Allocate a normal temporary for store result in the non-native byte order path
4950 // because scratch registers are used by the byte-swapped `expected` and `new_value`.
4951 DCHECK_EQ(old_temp_count, 1u);
4952 locations->AddTemp(Location::RequiresRegister());
4953 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004954 }
4955 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
4956 // Add a temporary for the `old_value_temp` in slow path.
4957 locations->AddTemp(Location::RequiresRegister());
4958 }
4959}
4960
4961static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
4962 DataType::Type type,
4963 MacroAssembler* masm,
4964 UseScratchRegisterScope* temps) {
4965 if (cpu_reg.IsS()) {
4966 DCHECK_EQ(type, DataType::Type::kFloat32);
4967 Register reg = temps->AcquireW();
4968 __ Fmov(reg, cpu_reg.S());
4969 return reg;
4970 } else if (cpu_reg.IsD()) {
4971 DCHECK_EQ(type, DataType::Type::kFloat64);
4972 Register reg = temps->AcquireX();
4973 __ Fmov(reg, cpu_reg.D());
4974 return reg;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004975 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00004976 return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004977 }
4978}
4979
4980static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4981 CodeGeneratorARM64* codegen,
4982 std::memory_order order,
4983 bool return_success,
Vladimir Marko98873af2020-12-16 12:10:03 +00004984 bool strong,
4985 bool byte_swap = false) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004986 DCHECK(return_success || strong);
4987
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004988 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4989 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4990 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4991 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4992
4993 MacroAssembler* masm = codegen->GetVIXLAssembler();
4994 LocationSummary* locations = invoke->GetLocations();
4995 CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
4996 CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
4997 CPURegister out = helpers::OutputCPURegister(invoke);
4998
Vladimir Marko98873af2020-12-16 12:10:03 +00004999 VarHandleTarget target = GetVarHandleTarget(invoke);
5000 VarHandleSlowPathARM64* slow_path = nullptr;
5001 if (!byte_swap) {
5002 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5003 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5004 GenerateVarHandleTarget(invoke, target, codegen);
5005 __ Bind(slow_path->GetNativeByteOrderLabel());
5006 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005007
5008 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5009 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5010 // Mark card for object assuming new value is stored.
5011 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
Vladimir Marko436977d2020-11-12 12:41:06 +00005012 codegen->MarkGCCard(target.object, new_value.W(), new_value_can_be_null);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005013 }
5014
Vladimir Marko479cbad2020-12-10 16:10:09 +00005015 // Reuse the `offset` temporary for the pointer to the target location,
5016 // except for references that need the offset for the read barrier.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005017 UseScratchRegisterScope temps(masm);
Vladimir Marko436977d2020-11-12 12:41:06 +00005018 Register tmp_ptr = target.offset.X();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005019 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5020 tmp_ptr = temps.AcquireX();
5021 }
Vladimir Marko436977d2020-11-12 12:41:06 +00005022 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005023
Vladimir Marko98873af2020-12-16 12:10:03 +00005024 // Move floating point values to scratch registers.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005025 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5026 Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
5027 Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
Vladimir Marko98873af2020-12-16 12:10:03 +00005028 bool is_fp = DataType::IsFloatingPointType(value_type);
5029 DataType::Type cas_type = is_fp
5030 ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005031 : value_type;
Vladimir Marko98873af2020-12-16 12:10:03 +00005032 // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
5033 // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
5034 if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
5035 Register temp = temps.AcquireW();
5036 __ Uxth(temp, expected_reg);
5037 expected_reg = temp;
5038 cas_type = DataType::Type::kUint16;
5039 } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
5040 Register temp = temps.AcquireW();
5041 __ Uxtb(temp, expected_reg);
5042 expected_reg = temp;
5043 cas_type = DataType::Type::kUint8;
5044 }
5045
5046 if (byte_swap) {
5047 // Do the byte swap and move values to scratch registers if needed.
5048 // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
5049 DCHECK_NE(value_type, DataType::Type::kInt8);
5050 if (!expected.IsZero()) {
5051 bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
5052 Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
5053 GenerateReverseBytes(masm, cas_type, expected_reg, temp);
5054 expected_reg = temp;
5055 }
5056 if (!new_value.IsZero()) {
5057 Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
5058 GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
5059 new_value_reg = temp;
5060 }
5061 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005062
5063 // Prepare registers for old value and the result of the exclusive store.
5064 Register old_value;
5065 Register store_result;
5066 if (return_success) {
5067 // Use the output register for both old value and exclusive store result.
5068 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5069 store_result = out.W();
5070 } else if (DataType::IsFloatingPointType(value_type)) {
5071 // We need two temporary registers but we have already used scratch registers for
5072 // holding the expected and new value unless they are zero bit pattern (+0.0f or
5073 // +0.0). We have allocated sufficient normal temporaries to handle that.
Vladimir Marko98873af2020-12-16 12:10:03 +00005074 size_t next_temp = 1u;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005075 if (expected.IsZero()) {
5076 old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
5077 } else {
5078 Location temp = locations->GetTemp(next_temp);
5079 ++next_temp;
5080 old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
5081 }
5082 store_result =
5083 new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
Vladimir Marko98873af2020-12-16 12:10:03 +00005084 DCHECK(!old_value.Is(tmp_ptr));
5085 DCHECK(!store_result.Is(tmp_ptr));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005086 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00005087 // Use the output register for the old value.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005088 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
Vladimir Marko98873af2020-12-16 12:10:03 +00005089 // Use scratch register for the store result, except when we have used up
5090 // scratch registers for byte-swapped `expected` and `new_value`.
5091 // In that case, we have allocated a normal temporary.
5092 store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
5093 ? WRegisterFrom(locations->GetTemp(1))
5094 : temps.AcquireW();
5095 DCHECK(!store_result.Is(tmp_ptr));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005096 }
5097
5098 vixl::aarch64::Label exit_loop_label;
5099 vixl::aarch64::Label* exit_loop = &exit_loop_label;
5100 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
5101
5102 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5103 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5104 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
Vladimir Marko98873af2020-12-16 12:10:03 +00005105 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005106 Register old_value_temp =
5107 WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
5108 // For strong CAS, use a scratch register for the store result in slow path.
5109 // For weak CAS, we need to check the store result, so store it in `store_result`.
5110 Register slow_path_store_result = strong ? Register() : store_result;
5111 ReadBarrierCasSlowPathARM64* rb_slow_path =
5112 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
5113 invoke,
5114 order,
5115 strong,
Vladimir Marko436977d2020-11-12 12:41:06 +00005116 target.object,
5117 target.offset.X(),
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005118 expected_reg,
5119 new_value_reg,
5120 old_value,
5121 old_value_temp,
5122 slow_path_store_result,
5123 /*update_old_value=*/ !return_success,
5124 codegen);
5125 codegen->AddSlowPath(rb_slow_path);
5126 exit_loop = rb_slow_path->GetExitLabel();
5127 cmp_failure = rb_slow_path->GetEntryLabel();
5128 }
5129
Vladimir Markoe17530a2020-11-11 17:02:26 +00005130 GenerateCompareAndSet(codegen,
5131 cas_type,
5132 order,
5133 strong,
5134 cmp_failure,
5135 tmp_ptr,
5136 new_value_reg,
5137 old_value,
5138 store_result,
5139 expected_reg);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005140 __ Bind(exit_loop);
5141
5142 if (return_success) {
5143 if (strong) {
5144 __ Cset(out.W(), eq);
5145 } else {
Vladimir Markoe17530a2020-11-11 17:02:26 +00005146 // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005147 // On failure, either the Z flag is clear or the store result is 0.
5148 // Determine the final success value with a CSEL.
5149 __ Csel(out.W(), store_result, wzr, eq);
5150 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005151 } else if (byte_swap) {
5152 // Also handles moving to FP registers.
5153 GenerateReverseBytes(masm, value_type, old_value, out);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005154 } else if (DataType::IsFloatingPointType(value_type)) {
5155 __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
Vladimir Marko98873af2020-12-16 12:10:03 +00005156 } else if (value_type == DataType::Type::kInt8) {
5157 __ Sxtb(out.W(), old_value);
5158 } else if (value_type == DataType::Type::kInt16) {
5159 __ Sxth(out.W(), old_value);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005160 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005161
5162 if (!byte_swap) {
5163 __ Bind(slow_path->GetExitLabel());
5164 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005165}
5166
5167void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5168 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5169}
5170
5171void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5172 GenerateVarHandleCompareAndSetOrExchange(
5173 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5174}
5175
5176void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5177 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5178}
5179
5180void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5181 GenerateVarHandleCompareAndSetOrExchange(
5182 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5183}
5184
5185void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5186 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5187}
5188
5189void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5190 GenerateVarHandleCompareAndSetOrExchange(
5191 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5192}
5193
5194void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5195 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5196}
5197
5198void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5199 GenerateVarHandleCompareAndSetOrExchange(
5200 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5201}
5202
5203void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5204 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5205}
5206
5207void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5208 GenerateVarHandleCompareAndSetOrExchange(
5209 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5210}
5211
5212void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5213 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5214}
5215
5216void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5217 GenerateVarHandleCompareAndSetOrExchange(
5218 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5219}
5220
5221void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5222 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5223}
5224
5225void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5226 GenerateVarHandleCompareAndSetOrExchange(
5227 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5228}
5229
5230void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5231 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5232}
5233
5234void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5235 GenerateVarHandleCompareAndSetOrExchange(
5236 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5237}
5238
Vladimir Markoe1510d42020-11-13 11:07:13 +00005239static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5240 GetAndUpdateOp get_and_update_op) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00005241 VarHandleOptimizations optimizations(invoke);
5242 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005243 return;
5244 }
5245
5246 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5247 invoke->GetType() == DataType::Type::kReference) {
5248 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5249 // the passed reference and reloads it from the field, thus seeing the new value
Vladimir Marko436977d2020-11-12 12:41:06 +00005250 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005251 return;
5252 }
5253
Vladimir Marko479cbad2020-12-10 16:10:09 +00005254 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005255
Vladimir Marko98873af2020-12-16 12:10:03 +00005256 size_t old_temp_count = locations->GetTempCount();
5257 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
Vladimir Markoe1510d42020-11-13 11:07:13 +00005258 if (DataType::IsFloatingPointType(invoke->GetType())) {
5259 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5260 // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
5261 locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5262 } else {
5263 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
Vladimir Marko98873af2020-12-16 12:10:03 +00005264 // We can reuse the declaring class temporary if present.
5265 if (old_temp_count == 1u &&
5266 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005267 // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5268 locations->AddTemp(Location::RequiresRegister());
5269 }
5270 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005271 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005272 // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5273 // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5274 if (old_temp_count == 1u &&
5275 (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5276 GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5277 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5278 DataType::Type value_type =
5279 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5280 if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5281 locations->AddTemp(Location::RequiresRegister());
5282 }
5283 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005284}
5285
Vladimir Markoe1510d42020-11-13 11:07:13 +00005286static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5287 CodeGeneratorARM64* codegen,
5288 GetAndUpdateOp get_and_update_op,
Vladimir Marko98873af2020-12-16 12:10:03 +00005289 std::memory_order order,
5290 bool byte_swap = false) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005291 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5292 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005293
5294 MacroAssembler* masm = codegen->GetVIXLAssembler();
5295 LocationSummary* locations = invoke->GetLocations();
Vladimir Markoe1510d42020-11-13 11:07:13 +00005296 CPURegister arg = InputCPURegisterOrZeroRegAt(invoke, arg_index);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005297 CPURegister out = helpers::OutputCPURegister(invoke);
5298
Vladimir Marko98873af2020-12-16 12:10:03 +00005299 VarHandleTarget target = GetVarHandleTarget(invoke);
5300 VarHandleSlowPathARM64* slow_path = nullptr;
5301 if (!byte_swap) {
5302 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5303 slow_path->SetGetAndUpdateOp(get_and_update_op);
5304 GenerateVarHandleTarget(invoke, target, codegen);
5305 __ Bind(slow_path->GetNativeByteOrderLabel());
5306 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005307
5308 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
Vladimir Markoe1510d42020-11-13 11:07:13 +00005309 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5310 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005311 // Mark card for object, the new value shall be stored.
5312 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
Vladimir Markoe1510d42020-11-13 11:07:13 +00005313 codegen->MarkGCCard(target.object, arg.W(), new_value_can_be_null);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005314 }
5315
Vladimir Marko479cbad2020-12-10 16:10:09 +00005316 // Reuse the `target.offset` temporary for the pointer to the target location,
5317 // except for references that need the offset for the non-Baker read barrier.
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005318 UseScratchRegisterScope temps(masm);
Vladimir Marko436977d2020-11-12 12:41:06 +00005319 Register tmp_ptr = target.offset.X();
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005320 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5321 value_type == DataType::Type::kReference) {
5322 tmp_ptr = temps.AcquireX();
5323 }
Vladimir Marko436977d2020-11-12 12:41:06 +00005324 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005325
Vladimir Markoe1510d42020-11-13 11:07:13 +00005326 // The load/store type is never floating point.
Vladimir Marko98873af2020-12-16 12:10:03 +00005327 bool is_fp = DataType::IsFloatingPointType(value_type);
5328 DataType::Type load_store_type = is_fp
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005329 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5330 : value_type;
Vladimir Marko98873af2020-12-16 12:10:03 +00005331 // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5332 // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5333 if (value_type == DataType::Type::kInt8) {
5334 load_store_type = DataType::Type::kUint8;
5335 } else if (value_type == DataType::Type::kInt16) {
5336 load_store_type = DataType::Type::kUint16;
5337 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005338
5339 // Prepare register for old value.
Vladimir Markoe1510d42020-11-13 11:07:13 +00005340 CPURegister old_value = out;
5341 if (get_and_update_op == GetAndUpdateOp::kSet) {
5342 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5343 // rather than moving between core and FP registers in the loop.
5344 arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5345 if (DataType::IsFloatingPointType(value_type) && !arg.IsZero()) {
5346 // We need a temporary register but we have already used a scratch register for
Vladimir Markoc8178f52020-11-24 10:38:16 +00005347 // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
Vladimir Markoe1510d42020-11-13 11:07:13 +00005348 // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
Vladimir Marko98873af2020-12-16 12:10:03 +00005349 old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
Vladimir Markoe1510d42020-11-13 11:07:13 +00005350 } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
5351 value_type == DataType::Type::kReference) {
5352 // Load the old value initially to a scratch register.
5353 // We shall move it to `out` later with a read barrier.
5354 old_value = temps.AcquireW();
5355 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005356 }
5357
Vladimir Marko98873af2020-12-16 12:10:03 +00005358 if (byte_swap) {
5359 DCHECK_NE(value_type, DataType::Type::kReference);
5360 DCHECK_NE(DataType::Size(value_type), 1u);
5361 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5362 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5363 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5364 } else if (!arg.IsZero()) {
5365 // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5366 // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5367 // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5368 // we have allocated a normal temporary to handle that.
5369 CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5370 ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5371 : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5372 GenerateReverseBytes(masm, load_store_type, arg, temp);
5373 arg = temp;
5374 }
5375 }
5376
Vladimir Markoe1510d42020-11-13 11:07:13 +00005377 GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005378
Vladimir Marko98873af2020-12-16 12:10:03 +00005379 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5380 // The only adjustment needed is sign-extension for `kInt16`.
5381 // Everything else has been done by the `GenerateGetAndUpdate()`.
5382 DCHECK(byte_swap);
5383 if (value_type == DataType::Type::kInt16) {
5384 DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5385 __ Sxth(out.W(), old_value.W());
Vladimir Markoe1510d42020-11-13 11:07:13 +00005386 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005387 } else if (byte_swap) {
5388 // Also handles moving to FP registers.
5389 GenerateReverseBytes(masm, value_type, old_value, out);
5390 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
5391 __ Fmov(out.D(), old_value.X());
5392 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
5393 __ Fmov(out.S(), old_value.W());
5394 } else if (value_type == DataType::Type::kInt8) {
5395 __ Sxtb(out.W(), old_value.W());
5396 } else if (value_type == DataType::Type::kInt16) {
5397 __ Sxth(out.W(), old_value.W());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005398 } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5399 if (kUseBakerReadBarrier) {
Vladimir Markoc8178f52020-11-24 10:38:16 +00005400 codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005401 } else {
5402 codegen->GenerateReadBarrierSlow(
5403 invoke,
Vladimir Markoe1510d42020-11-13 11:07:13 +00005404 Location::RegisterLocation(out.GetCode()),
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005405 Location::RegisterLocation(old_value.GetCode()),
Vladimir Marko436977d2020-11-12 12:41:06 +00005406 Location::RegisterLocation(target.object.GetCode()),
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005407 /*offset=*/ 0u,
Vladimir Marko436977d2020-11-12 12:41:06 +00005408 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005409 }
5410 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005411
5412 if (!byte_swap) {
5413 __ Bind(slow_path->GetExitLabel());
5414 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005415}
5416
5417void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005418 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005419}
5420
5421void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005422 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005423}
5424
5425void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005426 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005427}
5428
5429void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005430 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005431}
5432
5433void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005434 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005435}
5436
5437void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005438 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005439}
5440
Vladimir Markoe1510d42020-11-13 11:07:13 +00005441void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5442 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5443}
5444
5445void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5446 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5447}
5448
5449void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5450 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5451}
5452
5453void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5454 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5455}
5456
5457void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5458 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5459}
5460
5461void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5462 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5463}
5464
Vladimir Markoc2d5c702020-11-13 15:28:33 +00005465void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5466 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5467}
5468
5469void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5470 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5471}
5472
5473void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5474 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5475}
5476
5477void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5478 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5479}
5480
5481void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5482 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5483}
5484
5485void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5486 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5487}
5488
5489void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5490 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5491}
5492
5493void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5494 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5495}
5496
5497void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5498 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5499}
5500
5501void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5502 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5503}
5504
5505void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5506 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5507}
5508
5509void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5510 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5511}
5512
5513void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5514 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5515}
5516
5517void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5518 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5519}
5520
5521void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5522 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5523}
5524
5525void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5526 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5527}
5528
5529void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5530 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5531}
5532
5533void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5534 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5535}
Vladimir Markoe1510d42020-11-13 11:07:13 +00005536
Vladimir Marko98873af2020-12-16 12:10:03 +00005537void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5538 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5539 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5540 MacroAssembler* masm = codegen->GetVIXLAssembler();
5541 HInvoke* invoke = GetInvoke();
5542 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5543 DataType::Type value_type =
5544 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5545 DCHECK_NE(value_type, DataType::Type::kReference);
5546 size_t size = DataType::Size(value_type);
5547 DCHECK_GT(size, 1u);
5548 Register varhandle = InputRegisterAt(invoke, 0);
5549 Register object = InputRegisterAt(invoke, 1);
5550 Register index = InputRegisterAt(invoke, 2);
5551
5552 MemberOffset class_offset = mirror::Object::ClassOffset();
5553 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5554 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5555 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5556
5557 __ Bind(GetByteArrayViewCheckLabel());
5558
5559 VarHandleTarget target = GetVarHandleTarget(invoke);
5560 {
5561 UseScratchRegisterScope temps(masm);
5562 Register temp = temps.AcquireW();
5563 Register temp2 = temps.AcquireW();
5564
5565 // The main path checked that the coordinateType0 is an array class that matches
5566 // the class of the actual coordinate argument but it does not match the value type.
5567 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5568 __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
Ulya Trafimovich740e1f92021-10-15 12:11:37 +01005569 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
Vladimir Marko98873af2020-12-16 12:10:03 +00005570 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5571 __ Cmp(temp, temp2);
5572 __ B(GetEntryLabel(), ne);
5573
5574 // Check for array index out of bounds.
5575 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5576 __ Subs(temp, temp, index);
5577 __ Ccmp(temp, size, NoFlag, hs); // If SUBS yields LO (C=false), keep the C flag clear.
5578 __ B(GetEntryLabel(), lo);
5579
5580 // Construct the target.
5581 __ Add(target.offset, index, data_offset.Int32Value());
5582
5583 // Alignment check. For unaligned access, go to the runtime.
5584 DCHECK(IsPowerOfTwo(size));
5585 if (size == 2u) {
5586 __ Tbnz(target.offset, 0, GetEntryLabel());
5587 } else {
5588 __ Tst(target.offset, size - 1u);
5589 __ B(GetEntryLabel(), ne);
5590 }
5591
5592 // Byte order check. For native byte order return to the main path.
5593 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5594 IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5595 // There is no reason to differentiate between native byte order and byte-swap
5596 // for setting a zero bit pattern. Just return to the main path.
5597 __ B(GetNativeByteOrderLabel());
5598 return;
5599 }
5600 __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5601 __ Cbnz(temp, GetNativeByteOrderLabel());
5602 }
5603
5604 switch (access_mode_template) {
5605 case mirror::VarHandle::AccessModeTemplate::kGet:
5606 GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5607 break;
5608 case mirror::VarHandle::AccessModeTemplate::kSet:
5609 GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5610 break;
5611 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5612 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5613 GenerateVarHandleCompareAndSetOrExchange(
5614 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5615 break;
5616 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5617 GenerateVarHandleGetAndUpdate(
5618 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5619 break;
5620 }
5621 __ B(GetExitLabel());
5622}
5623
Aart Bikff7d89c2016-11-07 08:49:28 -08005624UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
5625UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08005626UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
5627UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
5628UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
Vladimir Markod4561172017-10-30 17:48:25 +00005629UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
5630UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
5631UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
5632UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
5633UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
5634UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
5635UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
5636UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
5637UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
5638UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
Aart Bik71bf7b42016-11-16 10:17:46 -08005639UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
5640UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08005641
Aart Bik0e54c012016-03-04 12:08:31 -08005642// 1.8.
5643UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
5644UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
5645UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
5646UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
5647UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08005648
Andra Danciua0130e82020-07-23 12:34:56 +00005649UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
5650UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
Andra Danciua0130e82020-07-23 12:34:56 +00005651
Sorin Basca2f01e8e2021-06-18 06:44:07 +00005652// OpenJDK 11
5653UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt)
5654UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong)
5655UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt)
5656UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong)
5657UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject)
Sorin Basca507cf902021-10-06 12:04:56 +00005658UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeCompareAndSetObject)
Sorin Basca2f01e8e2021-06-18 06:44:07 +00005659
Aart Bik2f9fcc92016-03-01 15:16:54 -08005660UNREACHABLE_INTRINSICS(ARM64)
Roland Levillain4d027112015-07-01 15:41:14 +01005661
5662#undef __
5663
Andreas Gampe878d58c2015-01-15 23:24:00 -08005664} // namespace arm64
5665} // namespace art