blob: 646f4f2ea72b8b1fb7e29f946521efd43c84bf9c [file] [log] [blame]
Andreas Gampe878d58c2015-01-15 23:24:00 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm64.h"
18
Vladimir Markoa41ea272020-09-07 15:24:36 +000019#include "arch/arm64/callee_save_frame_arm64.h"
Serban Constantinescu579885a2015-02-22 20:51:33 +000020#include "arch/arm64/instruction_set_features_arm64.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070021#include "art_method.h"
Vladimir Markoa41ea272020-09-07 15:24:36 +000022#include "base/bit_utils.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080023#include "code_generator_arm64.h"
24#include "common_arm64.h"
Vladimir Marko79db6462020-07-31 14:57:32 +010025#include "data_type-inl.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080026#include "entrypoints/quick/quick_entrypoints.h"
Andreas Gampe09659c22017-09-18 18:23:32 -070027#include "heap_poisoning.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080028#include "intrinsics.h"
Vladimir Marko9922f002020-06-08 15:05:15 +010029#include "intrinsics_utils.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080030#include "lock_word.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080031#include "mirror/array-inl.h"
Andreas Gampec15a2f42017-04-21 12:09:39 -070032#include "mirror/object_array-inl.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080033#include "mirror/reference.h"
Vladimir Markoe39f14f2017-02-10 15:44:25 +000034#include "mirror/string-inl.h"
Vladimir Marko79db6462020-07-31 14:57:32 +010035#include "mirror/var_handle.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080036#include "scoped_thread_state_change-inl.h"
Andreas Gampeb486a982017-06-01 13:45:54 -070037#include "thread-current-inl.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080038#include "utils/arm64/assembler_arm64.h"
Andreas Gampe878d58c2015-01-15 23:24:00 -080039
Scott Wakeling97c72b72016-06-24 16:19:36 +010040using namespace vixl::aarch64; // NOLINT(build/namespaces)
Andreas Gampe878d58c2015-01-15 23:24:00 -080041
Artem Serovaf4e42a2016-08-08 15:11:24 +010042// TODO(VIXL): Make VIXL compile with -Wshadow.
Scott Wakeling97c72b72016-06-24 16:19:36 +010043#pragma GCC diagnostic push
44#pragma GCC diagnostic ignored "-Wshadow"
Artem Serovaf4e42a2016-08-08 15:11:24 +010045#include "aarch64/disasm-aarch64.h"
46#include "aarch64/macro-assembler-aarch64.h"
Scott Wakeling97c72b72016-06-24 16:19:36 +010047#pragma GCC diagnostic pop
Andreas Gampe878d58c2015-01-15 23:24:00 -080048
Vladimir Marko0a516052019-10-14 13:00:44 +000049namespace art {
Andreas Gampe878d58c2015-01-15 23:24:00 -080050
51namespace arm64 {
52
Vladimir Marko98873af2020-12-16 12:10:03 +000053using helpers::CPURegisterFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080054using helpers::DRegisterFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080055using helpers::HeapOperand;
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +000056using helpers::LocationFrom;
Vladimir Markoa41ea272020-09-07 15:24:36 +000057using helpers::InputCPURegisterOrZeroRegAt;
58using helpers::IsConstantZeroBitPattern;
Scott Wakeling9ee23f42015-07-23 10:44:35 +010059using helpers::OperandFrom;
Andreas Gampe878d58c2015-01-15 23:24:00 -080060using helpers::RegisterFrom;
61using helpers::SRegisterFrom;
62using helpers::WRegisterFrom;
63using helpers::XRegisterFrom;
Usama Arif457e9fa2019-11-11 15:29:59 +000064using helpers::HRegisterFrom;
xueliang.zhong49924c92016-03-03 10:52:51 +000065using helpers::InputRegisterAt;
Scott Wakeling1f36f412016-04-21 11:13:45 +010066using helpers::OutputRegister;
Andreas Gampe878d58c2015-01-15 23:24:00 -080067
Andreas Gampe878d58c2015-01-15 23:24:00 -080068namespace {
69
70ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
71 return MemOperand(XRegisterFrom(location), offset);
72}
73
74} // namespace
75
Scott Wakeling97c72b72016-06-24 16:19:36 +010076MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
Alexandre Rames087930f2016-08-02 13:45:28 +010077 return codegen_->GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -080078}
79
80ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
Vladimir Markoca6fff82017-10-03 14:49:14 +010081 return codegen_->GetGraph()->GetAllocator();
Andreas Gampe878d58c2015-01-15 23:24:00 -080082}
83
Vladimir Marko9922f002020-06-08 15:05:15 +010084using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
85 SlowPathCodeARM64,
86 Arm64Assembler>;
87
Alexandre Rames087930f2016-08-02 13:45:28 +010088#define __ codegen->GetVIXLAssembler()->
Andreas Gampe878d58c2015-01-15 23:24:00 -080089
Roland Levillain0b671c02016-08-19 12:02:34 +010090// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
91class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
92 public:
93 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
94 : SlowPathCodeARM64(instruction), tmp_(tmp) {
95 DCHECK(kEmitCompilerReadBarrier);
96 DCHECK(kUseBakerReadBarrier);
97 }
98
Roland Levillainbbc6e7e2018-08-24 16:58:47 +010099 void EmitNativeCode(CodeGenerator* codegen_in) override {
Roland Levillain0b671c02016-08-19 12:02:34 +0100100 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
101 LocationSummary* locations = instruction_->GetLocations();
102 DCHECK(locations->CanCall());
103 DCHECK(instruction_->IsInvokeStaticOrDirect())
104 << "Unexpected instruction in read barrier arraycopy slow path: "
105 << instruction_->DebugName();
106 DCHECK(instruction_->GetLocations()->Intrinsified());
107 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
108
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100109 const int32_t element_size = DataType::Size(DataType::Type::kReference);
Roland Levillain0b671c02016-08-19 12:02:34 +0100110
111 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
112 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
113 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
114 Register tmp_reg = WRegisterFrom(tmp_);
115
116 __ Bind(GetEntryLabel());
117 vixl::aarch64::Label slow_copy_loop;
118 __ Bind(&slow_copy_loop);
119 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
120 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
121 // TODO: Inline the mark bit check before calling the runtime?
122 // tmp_reg = ReadBarrier::Mark(tmp_reg);
123 // No need to save live registers; it's taken care of by the
124 // entrypoint. Also, there is no need to update the stack mask,
125 // as this runtime call will not trigger a garbage collection.
126 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
127 // explanations.)
128 DCHECK_NE(tmp_.reg(), LR);
129 DCHECK_NE(tmp_.reg(), WSP);
130 DCHECK_NE(tmp_.reg(), WZR);
131 // IP0 is used internally by the ReadBarrierMarkRegX entry point
132 // as a temporary (and not preserved). It thus cannot be used by
133 // any live register in this slow path.
134 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
135 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
136 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
137 DCHECK_NE(tmp_.reg(), IP0);
138 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000139 // TODO: Load the entrypoint once before the loop, instead of
140 // loading it at every iteration.
Roland Levillain0b671c02016-08-19 12:02:34 +0100141 int32_t entry_point_offset =
Roland Levillain97c46462017-05-11 14:04:03 +0100142 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
Roland Levillain0b671c02016-08-19 12:02:34 +0100143 // This runtime call does not require a stack map.
144 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
145 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
146 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
147 __ Cmp(src_curr_addr, src_stop_addr);
148 __ B(&slow_copy_loop, ne);
149 __ B(GetExitLabel());
150 }
151
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100152 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
Roland Levillain0b671c02016-08-19 12:02:34 +0100153
154 private:
155 Location tmp_;
156
157 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
158};
Andreas Gampe878d58c2015-01-15 23:24:00 -0800159#undef __
160
161bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
162 Dispatch(invoke);
163 LocationSummary* res = invoke->GetLocations();
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000164 if (res == nullptr) {
165 return false;
166 }
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000167 return res->Intrinsified();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800168}
169
170#define __ masm->
171
Vladimir Markoca6fff82017-10-03 14:49:14 +0100172static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173 LocationSummary* locations =
174 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800175 locations->SetInAt(0, Location::RequiresFpuRegister());
176 locations->SetOut(Location::RequiresRegister());
177}
178
Vladimir Markoca6fff82017-10-03 14:49:14 +0100179static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
180 LocationSummary* locations =
181 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800182 locations->SetInAt(0, Location::RequiresRegister());
183 locations->SetOut(Location::RequiresFpuRegister());
184}
185
Scott Wakeling97c72b72016-06-24 16:19:36 +0100186static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800187 Location input = locations->InAt(0);
188 Location output = locations->Out();
189 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
190 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
191}
192
Scott Wakeling97c72b72016-06-24 16:19:36 +0100193static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
197 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
198}
199
200void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100201 CreateFPToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800202}
203void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100204 CreateIntToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800205}
206
207void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800208 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800209}
210void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800211 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800212}
213
214void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100215 CreateFPToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800216}
217void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100218 CreateIntToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800219}
220
221void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800222 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800223}
224void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800225 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800226}
227
Vladimir Markoca6fff82017-10-03 14:49:14 +0100228static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
229 LocationSummary* locations =
230 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800231 locations->SetInAt(0, Location::RequiresRegister());
232 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
233}
234
Usama Arif457e9fa2019-11-11 15:29:59 +0000235static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
236 LocationSummary* locations =
237 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
238 locations->SetInAt(0, Location::RequiresRegister());
239 locations->SetInAt(1, Location::RequiresRegister());
240 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
241}
242
Vladimir Marko8942b3a2020-07-20 10:42:15 +0100243static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
244 LocationSummary* locations =
245 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
246 locations->SetInAt(0, Location::RequiresRegister());
247 locations->SetInAt(1, Location::RequiresRegister());
248 // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
249 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
250}
251
Vladimir Marko98873af2020-12-16 12:10:03 +0000252static void GenerateReverseBytes(MacroAssembler* masm,
253 DataType::Type type,
254 CPURegister in,
255 CPURegister out) {
256 switch (type) {
257 case DataType::Type::kUint16:
258 __ Rev16(out.W(), in.W());
259 break;
260 case DataType::Type::kInt16:
261 __ Rev16(out.W(), in.W());
262 __ Sxth(out.W(), out.W());
263 break;
264 case DataType::Type::kInt32:
265 __ Rev(out.W(), in.W());
266 break;
267 case DataType::Type::kInt64:
268 __ Rev(out.X(), in.X());
269 break;
270 case DataType::Type::kFloat32:
271 __ Rev(in.W(), in.W()); // Note: Clobbers `in`.
272 __ Fmov(out.S(), in.W());
273 break;
274 case DataType::Type::kFloat64:
275 __ Rev(in.X(), in.X()); // Note: Clobbers `in`.
276 __ Fmov(out.D(), in.X());
277 break;
278 default:
279 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
280 UNREACHABLE();
281 }
282}
283
Andreas Gampe878d58c2015-01-15 23:24:00 -0800284static void GenReverseBytes(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100285 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100286 MacroAssembler* masm) {
Andreas Gampe878d58c2015-01-15 23:24:00 -0800287 Location in = locations->InAt(0);
288 Location out = locations->Out();
Vladimir Marko98873af2020-12-16 12:10:03 +0000289 GenerateReverseBytes(masm, type, CPURegisterFrom(in, type), CPURegisterFrom(out, type));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800290}
291
292void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100293 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800294}
295
296void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100297 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800298}
299
300void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100301 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800302}
303
304void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100305 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800306}
307
308void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100309 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800310}
311
312void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100313 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800314}
315
Scott Wakeling611d3392015-07-10 11:42:06 +0100316static void GenNumberOfLeadingZeros(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100317 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100318 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100319 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Scott Wakeling611d3392015-07-10 11:42:06 +0100320
321 Location in = locations->InAt(0);
322 Location out = locations->Out();
323
324 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
325}
326
327void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100328 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling611d3392015-07-10 11:42:06 +0100329}
330
331void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100332 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Scott Wakeling611d3392015-07-10 11:42:06 +0100333}
334
335void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100336 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling611d3392015-07-10 11:42:06 +0100337}
338
339void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100340 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Scott Wakeling611d3392015-07-10 11:42:06 +0100341}
342
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100343static void GenNumberOfTrailingZeros(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100344 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100345 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100346 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100347
348 Location in = locations->InAt(0);
349 Location out = locations->Out();
350
351 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
352 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
353}
354
355void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100356 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100357}
358
359void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100360 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100361}
362
363void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100364 CreateIntToIntLocations(allocator_, invoke);
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100365}
366
367void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100368 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Scott Wakeling9ee23f42015-07-23 10:44:35 +0100369}
370
Andreas Gampe878d58c2015-01-15 23:24:00 -0800371static void GenReverse(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100372 DataType::Type type,
Scott Wakeling97c72b72016-06-24 16:19:36 +0100373 MacroAssembler* masm) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100374 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800375
376 Location in = locations->InAt(0);
377 Location out = locations->Out();
378
379 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
380}
381
382void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100383 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800384}
385
386void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100387 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800388}
389
390void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100391 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800392}
393
394void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100395 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800396}
397
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100398static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
399 DCHECK(DataType::IsIntOrLongType(type)) << type;
400 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
401 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
xueliang.zhong49924c92016-03-03 10:52:51 +0000402
xueliang.zhong49924c92016-03-03 10:52:51 +0000403 UseScratchRegisterScope temps(masm);
404
Nicolas Geoffray457413a2016-03-04 11:10:17 +0000405 Register src = InputRegisterAt(instr, 0);
Roland Levillainfa3912e2016-04-01 18:21:55 +0100406 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +0100407 VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
xueliang.zhong49924c92016-03-03 10:52:51 +0000408
409 __ Fmov(fpr, src);
Nicolas Geoffray457413a2016-03-04 11:10:17 +0000410 __ Cnt(fpr.V8B(), fpr.V8B());
411 __ Addv(fpr.B(), fpr.V8B());
xueliang.zhong49924c92016-03-03 10:52:51 +0000412 __ Fmov(dst, fpr);
413}
414
415void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100416 CreateIntToIntLocations(allocator_, invoke);
xueliang.zhong49924c92016-03-03 10:52:51 +0000417}
418
419void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100420 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
xueliang.zhong49924c92016-03-03 10:52:51 +0000421}
422
423void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100424 CreateIntToIntLocations(allocator_, invoke);
xueliang.zhong49924c92016-03-03 10:52:51 +0000425}
426
427void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100428 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
xueliang.zhong49924c92016-03-03 10:52:51 +0000429}
430
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100431static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
432 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100433
434 UseScratchRegisterScope temps(masm);
435
436 Register src = InputRegisterAt(invoke, 0);
437 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100438 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
439 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
440 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100441
442 __ Clz(temp, src);
443 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
444 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
445 __ Lsr(dst, dst, temp);
446}
447
448void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100449 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100450}
451
452void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100453 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100454}
455
456void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100457 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100458}
459
460void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100461 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100462}
463
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100464static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
465 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100466
467 UseScratchRegisterScope temps(masm);
468
469 Register src = InputRegisterAt(invoke, 0);
470 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100471 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100472
473 __ Neg(temp, src);
474 __ And(dst, temp, src);
475}
476
477void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100478 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100479}
480
481void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100482 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100483}
484
485void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100486 CreateIntToIntLocations(allocator_, invoke);
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100487}
488
489void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100490 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
Petre-Ionut Tudorda483162017-08-14 13:54:31 +0100491}
492
Aart Bik3dad3412018-02-28 12:01:46 -0800493static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
494 LocationSummary* locations =
495 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
496 locations->SetInAt(0, Location::RequiresFpuRegister());
497 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
498}
499
Andreas Gampe878d58c2015-01-15 23:24:00 -0800500void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100501 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800502}
503
504void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
505 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100506 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800507 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
508}
509
510void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100511 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800512}
513
514void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
515 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100516 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800517 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
518}
519
520void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100521 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800522}
523
524void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
525 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100526 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800527 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
528}
529
530void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100531 CreateFPToFPLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800532}
533
534void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
535 LocationSummary* locations = invoke->GetLocations();
Scott Wakeling97c72b72016-06-24 16:19:36 +0100536 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800537 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
538}
539
Vladimir Markoca6fff82017-10-03 14:49:14 +0100540static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
541 LocationSummary* locations =
542 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800543 locations->SetInAt(0, Location::RequiresFpuRegister());
544 locations->SetOut(Location::RequiresRegister());
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100545 locations->AddTemp(Location::RequiresFpuRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800546}
547
Scott Wakeling97c72b72016-06-24 16:19:36 +0100548static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100549 // Java 8 API definition for Math.round():
550 // Return the closest long or int to the argument, with ties rounding to positive infinity.
551 //
552 // There is no single instruction in ARMv8 that can support the above definition.
553 // We choose to use FCVTAS here, because it has closest semantic.
554 // FCVTAS performs rounding to nearest integer, ties away from zero.
555 // For most inputs (positive values, zero or NaN), this instruction is enough.
556 // We only need a few handling code after FCVTAS if the input is negative half value.
557 //
558 // The reason why we didn't choose FCVTPS instruction here is that
559 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
560 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
561 // If we were using this instruction, for most inputs, more handling code would be needed.
562 LocationSummary* l = invoke->GetLocations();
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +0100563 VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
564 VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100565 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
Scott Wakeling97c72b72016-06-24 16:19:36 +0100566 vixl::aarch64::Label done;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800567
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100568 // Round to nearest integer, ties away from zero.
569 __ Fcvtas(out_reg, in_reg);
570
571 // For positive values, zero or NaN inputs, rounding is done.
Scott Wakeling97c72b72016-06-24 16:19:36 +0100572 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
xueliang.zhongd1e153c2016-05-27 18:56:13 +0100573
574 // Handle input < 0 cases.
575 // If input is negative but not a tie, previous result (round to nearest) is valid.
576 // If input is a negative tie, out_reg += 1.
577 __ Frinta(tmp_fp, in_reg);
578 __ Fsub(tmp_fp, in_reg, tmp_fp);
579 __ Fcmp(tmp_fp, 0.5);
580 __ Cinc(out_reg, out_reg, eq);
581
582 __ Bind(&done);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800583}
584
585void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100586 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800587}
588
589void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800590 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800591}
592
593void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100594 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800595}
596
597void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800598 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
Andreas Gampe878d58c2015-01-15 23:24:00 -0800599}
600
601void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100602 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800603}
604
605void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100606 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800607 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
608 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
609}
610
611void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100612 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800613}
614
615void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100616 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800617 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
618 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
619}
620
621void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100622 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800623}
624
625void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100626 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800627 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
628 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
629}
630
631void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100632 CreateIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800633}
634
635void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100636 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800637 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
638 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
639}
640
Vladimir Markoca6fff82017-10-03 14:49:14 +0100641static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
642 LocationSummary* locations =
643 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800644 locations->SetInAt(0, Location::RequiresRegister());
645 locations->SetInAt(1, Location::RequiresRegister());
646}
647
648void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100649 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800650}
651
652void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100653 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800654 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
655 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
656}
657
658void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100659 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800660}
661
662void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100663 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800664 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
665 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
666}
667
668void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100669 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800670}
671
672void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100673 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800674 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
675 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
676}
677
678void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100679 CreateIntIntToVoidLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800680}
681
682void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +0100683 MacroAssembler* masm = GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800684 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
685 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
686}
687
688void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100689 LocationSummary* locations =
690 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800691 locations->SetOut(Location::RequiresRegister());
692}
693
694void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100695 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
Andreas Gampe542451c2016-07-26 09:02:02 -0700696 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800697}
698
699static void GenUnsafeGet(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100700 DataType::Type type,
Andreas Gampe878d58c2015-01-15 23:24:00 -0800701 bool is_volatile,
702 CodeGeneratorARM64* codegen) {
703 LocationSummary* locations = invoke->GetLocations();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100704 DCHECK((type == DataType::Type::kInt32) ||
705 (type == DataType::Type::kInt64) ||
706 (type == DataType::Type::kReference));
Roland Levillain22ccc3a2015-11-24 13:10:05 +0000707 Location base_loc = locations->InAt(1);
708 Register base = WRegisterFrom(base_loc); // Object pointer.
709 Location offset_loc = locations->InAt(2);
710 Register offset = XRegisterFrom(offset_loc); // Long offset.
711 Location trg_loc = locations->Out();
712 Register trg = RegisterFrom(trg_loc, type);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800713
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100714 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Roland Levillain44015862016-01-22 11:47:17 +0000715 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Roland Levillain54f869e2017-03-06 13:54:11 +0000716 Register temp = WRegisterFrom(locations->GetTemp(0));
Vladimir Marko248141f2018-08-10 10:40:07 +0100717 MacroAssembler* masm = codegen->GetVIXLAssembler();
718 // Piggy-back on the field load path using introspection for the Baker read barrier.
719 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
720 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
721 trg_loc,
722 base,
723 MemOperand(temp.X()),
Andreas Gampe3db70682018-12-26 15:12:03 -0800724 /* needs_null_check= */ false,
Vladimir Marko248141f2018-08-10 10:40:07 +0100725 is_volatile);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800726 } else {
Roland Levillain44015862016-01-22 11:47:17 +0000727 // Other cases.
728 MemOperand mem_op(base.X(), offset);
729 if (is_volatile) {
Vladimir Marko98873af2020-12-16 12:10:03 +0000730 codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true);
Roland Levillain44015862016-01-22 11:47:17 +0000731 } else {
732 codegen->Load(type, trg, mem_op);
733 }
Roland Levillain4d027112015-07-01 15:41:14 +0100734
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100735 if (type == DataType::Type::kReference) {
Roland Levillain44015862016-01-22 11:47:17 +0000736 DCHECK(trg.IsW());
Roland Levillaina1aa3b12016-10-26 13:03:38 +0100737 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
Roland Levillain44015862016-01-22 11:47:17 +0000738 }
Roland Levillain4d027112015-07-01 15:41:14 +0100739 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800740}
741
Sorin Basca507cf902021-10-06 12:04:56 +0000742static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
743 switch (intrinsic) {
744 case Intrinsics::kUnsafeGetObject:
745 case Intrinsics::kUnsafeGetObjectVolatile:
746 case Intrinsics::kJdkUnsafeGetObject:
747 case Intrinsics::kJdkUnsafeGetObjectVolatile:
748 case Intrinsics::kJdkUnsafeGetObjectAcquire:
749 return true;
750 default:
751 break;
752 }
753 return false;
754}
755
Vladimir Markoca6fff82017-10-03 14:49:14 +0100756static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Sorin Basca507cf902021-10-06 12:04:56 +0000757 bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
Vladimir Markoca6fff82017-10-03 14:49:14 +0100758 LocationSummary* locations =
759 new (allocator) LocationSummary(invoke,
760 can_call
761 ? LocationSummary::kCallOnSlowPath
762 : LocationSummary::kNoCall,
763 kIntrinsified);
Vladimir Marko70e97462016-08-09 11:04:26 +0100764 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko804b03f2016-09-14 16:26:36 +0100765 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
Vladimir Marko248141f2018-08-10 10:40:07 +0100766 // We need a temporary register for the read barrier load in order to use
767 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
768 locations->AddTemp(FixedTempLocation());
Vladimir Marko70e97462016-08-09 11:04:26 +0100769 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800770 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
771 locations->SetInAt(1, Location::RequiresRegister());
772 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillainbfea3352016-06-23 13:48:47 +0100773 locations->SetOut(Location::RequiresRegister(),
Roland Levillaina1aa3b12016-10-26 13:03:38 +0100774 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800775}
776
777void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000778 VisitJdkUnsafeGet(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800779}
780void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000781 VisitJdkUnsafeGetVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800782}
783void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000784 VisitJdkUnsafeGetLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800785}
786void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000787 VisitJdkUnsafeGetLongVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800788}
789void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000790 VisitJdkUnsafeGetObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800791}
792void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000793 VisitJdkUnsafeGetObjectVolatile(invoke);
794}
795
796void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
797 CreateIntIntIntToIntLocations(allocator_, invoke);
798}
799void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
800 CreateIntIntIntToIntLocations(allocator_, invoke);
801}
Sorin Basca0069ad72021-09-17 17:33:09 +0000802void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
803 CreateIntIntIntToIntLocations(allocator_, invoke);
804}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000805void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
806 CreateIntIntIntToIntLocations(allocator_, invoke);
807}
808void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
809 CreateIntIntIntToIntLocations(allocator_, invoke);
810}
Sorin Basca507cf902021-10-06 12:04:56 +0000811void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
812 CreateIntIntIntToIntLocations(allocator_, invoke);
813}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000814void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
815 CreateIntIntIntToIntLocations(allocator_, invoke);
816}
817void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100818 CreateIntIntIntToIntLocations(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800819}
Sorin Basca507cf902021-10-06 12:04:56 +0000820void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
821 CreateIntIntIntToIntLocations(allocator_, invoke);
822}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800823
824void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000825 VisitJdkUnsafeGet(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800826}
827void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000828 VisitJdkUnsafeGetVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800829}
830void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000831 VisitJdkUnsafeGetLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800832}
833void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000834 VisitJdkUnsafeGetLongVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800835}
836void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000837 VisitJdkUnsafeGetObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800838}
839void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000840 VisitJdkUnsafeGetObjectVolatile(invoke);
841}
842
843void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
844 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
845}
846void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
847 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
848}
Sorin Basca0069ad72021-09-17 17:33:09 +0000849void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
850 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
851}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000852void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
853 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
854}
855void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
856 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
857}
Sorin Basca507cf902021-10-06 12:04:56 +0000858void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
859 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
860}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000861void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
862 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
863}
864void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
865 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800866}
Sorin Basca507cf902021-10-06 12:04:56 +0000867void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
868 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
869}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800870
Vladimir Markoca6fff82017-10-03 14:49:14 +0100871static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
872 LocationSummary* locations =
873 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800874 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
875 locations->SetInAt(1, Location::RequiresRegister());
876 locations->SetInAt(2, Location::RequiresRegister());
877 locations->SetInAt(3, Location::RequiresRegister());
878}
879
880void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000881 VisitJdkUnsafePut(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800882}
883void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000884 VisitJdkUnsafePutOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800885}
886void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000887 VisitJdkUnsafePutVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800888}
889void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000890 VisitJdkUnsafePutObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800891}
892void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000893 VisitJdkUnsafePutObjectOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800894}
895void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000896 VisitJdkUnsafePutObjectVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800897}
898void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000899 VisitJdkUnsafePutLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800900}
901void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000902 VisitJdkUnsafePutLongOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800903}
904void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000905 VisitJdkUnsafePutLongVolatile(invoke);
906}
907
908void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) {
909 CreateIntIntIntIntToVoid(allocator_, invoke);
910}
911void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
912 CreateIntIntIntIntToVoid(allocator_, invoke);
913}
914void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
915 CreateIntIntIntIntToVoid(allocator_, invoke);
916}
Sorin Basca0069ad72021-09-17 17:33:09 +0000917void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
918 CreateIntIntIntIntToVoid(allocator_, invoke);
919}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000920void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
921 CreateIntIntIntIntToVoid(allocator_, invoke);
922}
923void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
924 CreateIntIntIntIntToVoid(allocator_, invoke);
925}
926void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
927 CreateIntIntIntIntToVoid(allocator_, invoke);
928}
Sorin Basca507cf902021-10-06 12:04:56 +0000929void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
930 CreateIntIntIntIntToVoid(allocator_, invoke);
931}
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000932void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
933 CreateIntIntIntIntToVoid(allocator_, invoke);
934}
935void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
936 CreateIntIntIntIntToVoid(allocator_, invoke);
937}
938void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100939 CreateIntIntIntIntToVoid(allocator_, invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800940}
Sorin Basca507cf902021-10-06 12:04:56 +0000941void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
942 CreateIntIntIntIntToVoid(allocator_, invoke);
943}
Andreas Gampe878d58c2015-01-15 23:24:00 -0800944
Artem Serov914d7a82017-02-07 14:33:49 +0000945static void GenUnsafePut(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100946 DataType::Type type,
Andreas Gampe878d58c2015-01-15 23:24:00 -0800947 bool is_volatile,
948 bool is_ordered,
949 CodeGeneratorARM64* codegen) {
Artem Serov914d7a82017-02-07 14:33:49 +0000950 LocationSummary* locations = invoke->GetLocations();
Alexandre Rames087930f2016-08-02 13:45:28 +0100951 MacroAssembler* masm = codegen->GetVIXLAssembler();
Andreas Gampe878d58c2015-01-15 23:24:00 -0800952
953 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
954 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
955 Register value = RegisterFrom(locations->InAt(3), type);
Roland Levillain4d027112015-07-01 15:41:14 +0100956 Register source = value;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800957 MemOperand mem_op(base.X(), offset);
958
Roland Levillain4d027112015-07-01 15:41:14 +0100959 {
960 // We use a block to end the scratch scope before the write barrier, thus
961 // freeing the temporary registers so they can be used in `MarkGCCard`.
962 UseScratchRegisterScope temps(masm);
963
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100964 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
Roland Levillain4d027112015-07-01 15:41:14 +0100965 DCHECK(value.IsW());
966 Register temp = temps.AcquireW();
967 __ Mov(temp.W(), value.W());
968 codegen->GetAssembler()->PoisonHeapReference(temp.W());
969 source = temp;
Andreas Gampe878d58c2015-01-15 23:24:00 -0800970 }
Roland Levillain4d027112015-07-01 15:41:14 +0100971
972 if (is_volatile || is_ordered) {
Andreas Gampe3db70682018-12-26 15:12:03 -0800973 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
Roland Levillain4d027112015-07-01 15:41:14 +0100974 } else {
975 codegen->Store(type, source, mem_op);
976 }
Andreas Gampe878d58c2015-01-15 23:24:00 -0800977 }
978
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100979 if (type == DataType::Type::kReference) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +0100980 bool value_can_be_null = true; // TODO: Worth finding out this information?
981 codegen->MarkGCCard(base, value, value_can_be_null);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800982 }
983}
984
985void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000986 VisitJdkUnsafePut(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800987}
988void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000989 VisitJdkUnsafePutOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800990}
991void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000992 VisitJdkUnsafePutVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800993}
994void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000995 VisitJdkUnsafePutObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800996}
997void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +0000998 VisitJdkUnsafePutObjectOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -0800999}
1000void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001001 VisitJdkUnsafePutObjectVolatile(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001002}
1003void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001004 VisitJdkUnsafePutLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001005}
1006void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001007 VisitJdkUnsafePutLongOrdered(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001008}
1009void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001010 VisitJdkUnsafePutLongVolatile(invoke);
1011}
1012
1013void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) {
1014 GenUnsafePut(invoke,
1015 DataType::Type::kInt32,
1016 /*is_volatile=*/ false,
1017 /*is_ordered=*/ false,
1018 codegen_);
1019}
1020void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1021 GenUnsafePut(invoke,
1022 DataType::Type::kInt32,
1023 /*is_volatile=*/ false,
1024 /*is_ordered=*/ true,
1025 codegen_);
1026}
1027void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1028 GenUnsafePut(invoke,
1029 DataType::Type::kInt32,
1030 /*is_volatile=*/ true,
1031 /*is_ordered=*/ false,
1032 codegen_);
1033}
Sorin Basca0069ad72021-09-17 17:33:09 +00001034void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1035 GenUnsafePut(invoke,
1036 DataType::Type::kInt32,
1037 /*is_volatile=*/ true,
1038 /*is_ordered=*/ false,
1039 codegen_);
1040}
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001041void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
1042 GenUnsafePut(invoke,
1043 DataType::Type::kReference,
1044 /*is_volatile=*/ false,
1045 /*is_ordered=*/ false,
1046 codegen_);
1047}
1048void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1049 GenUnsafePut(invoke,
1050 DataType::Type::kReference,
1051 /*is_volatile=*/ false,
1052 /*is_ordered=*/ true,
1053 codegen_);
1054}
1055void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
1056 GenUnsafePut(invoke,
1057 DataType::Type::kReference,
1058 /*is_volatile=*/ true,
1059 /*is_ordered=*/ false,
1060 codegen_);
1061}
Sorin Basca507cf902021-10-06 12:04:56 +00001062void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
1063 GenUnsafePut(invoke,
1064 DataType::Type::kReference,
1065 /*is_volatile=*/ true,
1066 /*is_ordered=*/ false,
1067 codegen_);
1068}
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001069void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
Artem Serov914d7a82017-02-07 14:33:49 +00001070 GenUnsafePut(invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001071 DataType::Type::kInt64,
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001072 /*is_volatile=*/ false,
1073 /*is_ordered=*/ false,
1074 codegen_);
1075}
1076void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1077 GenUnsafePut(invoke,
1078 DataType::Type::kInt64,
1079 /*is_volatile=*/ false,
1080 /*is_ordered=*/ true,
1081 codegen_);
1082}
1083void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1084 GenUnsafePut(invoke,
1085 DataType::Type::kInt64,
1086 /*is_volatile=*/ true,
1087 /*is_ordered=*/ false,
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001088 codegen_);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001089}
Sorin Basca507cf902021-10-06 12:04:56 +00001090void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1091 GenUnsafePut(invoke,
1092 DataType::Type::kInt64,
1093 /*is_volatile=*/ true,
1094 /*is_ordered=*/ false,
1095 codegen_);
1096}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001097
Vladimir Markoe17530a2020-11-11 17:02:26 +00001098static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Ulya Trafimovichec696e52022-01-26 10:21:32 +00001099 const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke);
Vladimir Markoca6fff82017-10-03 14:49:14 +01001100 LocationSummary* locations =
1101 new (allocator) LocationSummary(invoke,
1102 can_call
1103 ? LocationSummary::kCallOnSlowPath
1104 : LocationSummary::kNoCall,
1105 kIntrinsified);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001106 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko94796f82018-08-08 15:15:33 +01001107 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1108 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001109 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1110 locations->SetInAt(1, Location::RequiresRegister());
1111 locations->SetInAt(2, Location::RequiresRegister());
1112 locations->SetInAt(3, Location::RequiresRegister());
1113 locations->SetInAt(4, Location::RequiresRegister());
1114
Vladimir Marko94796f82018-08-08 15:15:33 +01001115 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001116}
1117
1118static void EmitLoadExclusive(CodeGeneratorARM64* codegen,
1119 DataType::Type type,
1120 Register ptr,
1121 Register old_value,
1122 bool use_load_acquire) {
1123 Arm64Assembler* assembler = codegen->GetAssembler();
1124 MacroAssembler* masm = assembler->GetVIXLAssembler();
1125 switch (type) {
1126 case DataType::Type::kBool:
Vladimir Marko98873af2020-12-16 12:10:03 +00001127 case DataType::Type::kUint8:
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001128 case DataType::Type::kInt8:
1129 if (use_load_acquire) {
1130 __ Ldaxrb(old_value, MemOperand(ptr));
1131 } else {
1132 __ Ldxrb(old_value, MemOperand(ptr));
1133 }
1134 break;
1135 case DataType::Type::kUint16:
1136 case DataType::Type::kInt16:
1137 if (use_load_acquire) {
1138 __ Ldaxrh(old_value, MemOperand(ptr));
1139 } else {
1140 __ Ldxrh(old_value, MemOperand(ptr));
1141 }
1142 break;
1143 case DataType::Type::kInt32:
1144 case DataType::Type::kInt64:
1145 case DataType::Type::kReference:
1146 if (use_load_acquire) {
1147 __ Ldaxr(old_value, MemOperand(ptr));
1148 } else {
1149 __ Ldxr(old_value, MemOperand(ptr));
1150 }
1151 break;
1152 default:
1153 LOG(FATAL) << "Unexpected type: " << type;
1154 UNREACHABLE();
1155 }
1156 switch (type) {
1157 case DataType::Type::kInt8:
1158 __ Sxtb(old_value, old_value);
1159 break;
1160 case DataType::Type::kInt16:
1161 __ Sxth(old_value, old_value);
1162 break;
1163 case DataType::Type::kReference:
1164 assembler->MaybeUnpoisonHeapReference(old_value);
1165 break;
1166 default:
1167 break;
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001168 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001169}
1170
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001171static void EmitStoreExclusive(CodeGeneratorARM64* codegen,
1172 DataType::Type type,
1173 Register ptr,
1174 Register store_result,
1175 Register new_value,
1176 bool use_store_release) {
1177 Arm64Assembler* assembler = codegen->GetAssembler();
1178 MacroAssembler* masm = assembler->GetVIXLAssembler();
1179 if (type == DataType::Type::kReference) {
1180 assembler->MaybePoisonHeapReference(new_value);
1181 }
1182 switch (type) {
1183 case DataType::Type::kBool:
Vladimir Marko98873af2020-12-16 12:10:03 +00001184 case DataType::Type::kUint8:
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001185 case DataType::Type::kInt8:
1186 if (use_store_release) {
1187 __ Stlxrb(store_result, new_value, MemOperand(ptr));
1188 } else {
1189 __ Stxrb(store_result, new_value, MemOperand(ptr));
1190 }
1191 break;
1192 case DataType::Type::kUint16:
1193 case DataType::Type::kInt16:
1194 if (use_store_release) {
1195 __ Stlxrh(store_result, new_value, MemOperand(ptr));
1196 } else {
1197 __ Stxrh(store_result, new_value, MemOperand(ptr));
1198 }
1199 break;
1200 case DataType::Type::kInt32:
1201 case DataType::Type::kInt64:
1202 case DataType::Type::kReference:
1203 if (use_store_release) {
1204 __ Stlxr(store_result, new_value, MemOperand(ptr));
1205 } else {
1206 __ Stxr(store_result, new_value, MemOperand(ptr));
1207 }
1208 break;
1209 default:
1210 LOG(FATAL) << "Unexpected type: " << type;
1211 UNREACHABLE();
1212 }
1213 if (type == DataType::Type::kReference) {
1214 assembler->MaybeUnpoisonHeapReference(new_value);
1215 }
1216}
Vladimir Marko94796f82018-08-08 15:15:33 +01001217
Vladimir Markoe17530a2020-11-11 17:02:26 +00001218static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
1219 DataType::Type type,
1220 std::memory_order order,
1221 bool strong,
1222 vixl::aarch64::Label* cmp_failure,
1223 Register ptr,
1224 Register new_value,
1225 Register old_value,
1226 Register store_result,
1227 Register expected,
1228 Register expected2 = Register()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001229 // The `expected2` is valid only for reference slow path and represents the unmarked old value
1230 // from the main path attempt to emit CAS when the marked old value matched `expected`.
Santiago Aboy Solanes872ec722022-02-18 14:10:25 +00001231 DCHECK_IMPLIES(expected2.IsValid(), type == DataType::Type::kReference);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001232
1233 DCHECK(ptr.IsX());
1234 DCHECK_EQ(new_value.IsX(), type == DataType::Type::kInt64);
1235 DCHECK_EQ(old_value.IsX(), type == DataType::Type::kInt64);
1236 DCHECK(store_result.IsW());
1237 DCHECK_EQ(expected.IsX(), type == DataType::Type::kInt64);
Santiago Aboy Solanes872ec722022-02-18 14:10:25 +00001238 DCHECK_IMPLIES(expected2.IsValid(), expected2.IsW());
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001239
1240 Arm64Assembler* assembler = codegen->GetAssembler();
1241 MacroAssembler* masm = assembler->GetVIXLAssembler();
1242
1243 bool use_load_acquire =
1244 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1245 bool use_store_release =
1246 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1247 DCHECK(use_load_acquire || use_store_release || order == std::memory_order_relaxed);
1248
1249 // repeat: {
1250 // old_value = [ptr]; // Load exclusive.
1251 // if (old_value != expected && old_value != expected2) goto cmp_failure;
1252 // store_result = failed([ptr] <- new_value); // Store exclusive.
1253 // }
1254 // if (strong) {
1255 // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
1256 // } else {
1257 // store_result = store_result ^ 1; // Report success as 1, failure as 0.
1258 // }
1259 //
1260 // Flag Z indicates whether `old_value == expected || old_value == expected2`.
1261 // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.)
1262
1263 vixl::aarch64::Label loop_head;
1264 if (strong) {
1265 __ Bind(&loop_head);
1266 }
1267 EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
1268 __ Cmp(old_value, expected);
1269 if (expected2.IsValid()) {
1270 __ Ccmp(old_value, expected2, ZFlag, ne);
1271 }
Vladimir Markoc8178f52020-11-24 10:38:16 +00001272 // If the comparison failed, the Z flag is cleared as we branch to the `cmp_failure` label.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001273 // If the comparison succeeded, the Z flag is set and remains set after the end of the
1274 // code emitted here, unless we retry the whole operation.
1275 __ B(cmp_failure, ne);
1276 EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
1277 if (strong) {
1278 __ Cbnz(store_result, &loop_head);
1279 } else {
1280 // Flip the `store_result` register to indicate success by 1 and failure by 0.
1281 __ Eor(store_result, store_result, 1);
1282 }
1283}
1284
1285class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1286 public:
1287 ReadBarrierCasSlowPathARM64(HInvoke* invoke,
1288 std::memory_order order,
1289 bool strong,
1290 Register base,
1291 Register offset,
1292 Register expected,
1293 Register new_value,
1294 Register old_value,
1295 Register old_value_temp,
1296 Register store_result,
1297 bool update_old_value,
1298 CodeGeneratorARM64* arm64_codegen)
1299 : SlowPathCodeARM64(invoke),
1300 order_(order),
1301 strong_(strong),
1302 base_(base),
1303 offset_(offset),
1304 expected_(expected),
1305 new_value_(new_value),
1306 old_value_(old_value),
1307 old_value_temp_(old_value_temp),
1308 store_result_(store_result),
1309 update_old_value_(update_old_value),
1310 mark_old_value_slow_path_(nullptr),
1311 update_old_value_slow_path_(nullptr) {
1312 if (!kUseBakerReadBarrier) {
1313 // We need to add the slow path now, it is too late when emitting slow path code.
1314 mark_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1315 invoke,
1316 Location::RegisterLocation(old_value_temp.GetCode()),
1317 Location::RegisterLocation(old_value.GetCode()),
1318 Location::RegisterLocation(base.GetCode()),
1319 /*offset=*/ 0u,
1320 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1321 if (update_old_value_) {
1322 update_old_value_slow_path_ = arm64_codegen->AddReadBarrierSlowPath(
1323 invoke,
1324 Location::RegisterLocation(old_value.GetCode()),
1325 Location::RegisterLocation(old_value_temp.GetCode()),
1326 Location::RegisterLocation(base.GetCode()),
1327 /*offset=*/ 0u,
1328 /*index=*/ Location::RegisterLocation(offset.GetCode()));
1329 }
1330 }
1331 }
1332
1333 const char* GetDescription() const override { return "ReadBarrierCasSlowPathARM64"; }
Vladimir Marko94796f82018-08-08 15:15:33 +01001334
Roland Levillainbbc6e7e2018-08-24 16:58:47 +01001335 void EmitNativeCode(CodeGenerator* codegen) override {
Vladimir Marko94796f82018-08-08 15:15:33 +01001336 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1337 Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1338 MacroAssembler* masm = assembler->GetVIXLAssembler();
1339 __ Bind(GetEntryLabel());
1340
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001341 // Mark the `old_value_` from the main path and compare with `expected_`.
1342 if (kUseBakerReadBarrier) {
1343 DCHECK(mark_old_value_slow_path_ == nullptr);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001344 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001345 } else {
1346 DCHECK(mark_old_value_slow_path_ != nullptr);
1347 __ B(mark_old_value_slow_path_->GetEntryLabel());
1348 __ Bind(mark_old_value_slow_path_->GetExitLabel());
1349 }
1350 __ Cmp(old_value_temp_, expected_);
1351 if (update_old_value_) {
1352 // Update the old value if we're going to return from the slow path.
1353 __ Csel(old_value_, old_value_temp_, old_value_, ne);
1354 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001355 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1356
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001357 // The `old_value` we have read did not match `expected` (which is always a to-space
1358 // reference) but after the read barrier the marked to-space value matched, so the
1359 // `old_value` must be a from-space reference to the same object. Do the same CAS loop
1360 // as the main path but check for both `expected` and the unmarked old value
1361 // representing the to-space and from-space references for the same object.
Vladimir Marko94796f82018-08-08 15:15:33 +01001362
1363 UseScratchRegisterScope temps(masm);
Santiago Aboy Solanes872ec722022-02-18 14:10:25 +00001364 DCHECK_IMPLIES(store_result_.IsValid(), !temps.IsAvailable(store_result_));
Vladimir Marko94796f82018-08-08 15:15:33 +01001365 Register tmp_ptr = temps.AcquireX();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001366 Register store_result = store_result_.IsValid() ? store_result_ : temps.AcquireW();
Vladimir Marko94796f82018-08-08 15:15:33 +01001367
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001368 // Recalculate the `tmp_ptr` from main path clobbered by the read barrier above.
1369 __ Add(tmp_ptr, base_.X(), Operand(offset_));
Vladimir Marko94796f82018-08-08 15:15:33 +01001370
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001371 vixl::aarch64::Label mark_old_value;
Vladimir Markoe17530a2020-11-11 17:02:26 +00001372 GenerateCompareAndSet(arm64_codegen,
1373 DataType::Type::kReference,
1374 order_,
1375 strong_,
1376 /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
1377 tmp_ptr,
1378 new_value_,
1379 /*old_value=*/ old_value_temp_,
1380 store_result,
1381 expected_,
1382 /*expected2=*/ old_value_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001383 if (update_old_value_) {
1384 // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
1385 // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
1386 __ Mov(old_value_, expected_);
1387 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001388
Vladimir Markoe17530a2020-11-11 17:02:26 +00001389 // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001390 // For strong CAS, that's the overall success. For weak CAS, the code also needs
1391 // to check the `store_result` after returning from the slow path.
Vladimir Marko94796f82018-08-08 15:15:33 +01001392 __ B(GetExitLabel());
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001393
1394 if (update_old_value_) {
1395 __ Bind(&mark_old_value);
1396 if (kUseBakerReadBarrier) {
1397 DCHECK(update_old_value_slow_path_ == nullptr);
Vladimir Markoc8178f52020-11-24 10:38:16 +00001398 arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001399 } else {
1400 // Note: We could redirect the `failure` above directly to the entry label and bind
1401 // the exit label in the main path, but the main path would need to access the
1402 // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
1403 DCHECK(update_old_value_slow_path_ != nullptr);
1404 __ B(update_old_value_slow_path_->GetEntryLabel());
1405 __ Bind(update_old_value_slow_path_->GetExitLabel());
1406 }
1407 __ B(GetExitLabel());
1408 }
Vladimir Marko94796f82018-08-08 15:15:33 +01001409 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001410
1411 private:
1412 std::memory_order order_;
1413 bool strong_;
1414 Register base_;
1415 Register offset_;
1416 Register expected_;
1417 Register new_value_;
1418 Register old_value_;
1419 Register old_value_temp_;
1420 Register store_result_;
1421 bool update_old_value_;
1422 SlowPathCodeARM64* mark_old_value_slow_path_;
1423 SlowPathCodeARM64* update_old_value_slow_path_;
Vladimir Marko94796f82018-08-08 15:15:33 +01001424};
1425
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001426static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1427 MacroAssembler* masm = codegen->GetVIXLAssembler();
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001428 LocationSummary* locations = invoke->GetLocations();
Andreas Gampe878d58c2015-01-15 23:24:00 -08001429
Vladimir Marko94796f82018-08-08 15:15:33 +01001430 Register out = WRegisterFrom(locations->Out()); // Boolean result.
1431 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1432 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1433 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001434 Register new_value = RegisterFrom(locations->InAt(4), type); // New value.
Andreas Gampe878d58c2015-01-15 23:24:00 -08001435
1436 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001437 if (type == DataType::Type::kReference) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001438 // Mark card for object assuming new value is stored.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001439 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
1440 codegen->MarkGCCard(base, new_value, new_value_can_be_null);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001441 }
1442
1443 UseScratchRegisterScope temps(masm);
1444 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
Vladimir Marko94796f82018-08-08 15:15:33 +01001445 Register old_value; // Value in memory.
Andreas Gampe878d58c2015-01-15 23:24:00 -08001446
Vladimir Marko94796f82018-08-08 15:15:33 +01001447 vixl::aarch64::Label exit_loop_label;
1448 vixl::aarch64::Label* exit_loop = &exit_loop_label;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001449 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
Vladimir Marko94796f82018-08-08 15:15:33 +01001450
1451 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001452 // We need to store the `old_value` in a non-scratch register to make sure
1453 // the read barrier in the slow path does not clobber it.
1454 old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
1455 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
1456 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
1457 Register old_value_temp = WRegisterFrom(locations->GetTemp(1));
1458 ReadBarrierCasSlowPathARM64* slow_path =
1459 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
1460 invoke,
1461 std::memory_order_seq_cst,
1462 /*strong=*/ true,
1463 base,
1464 offset,
1465 expected,
1466 new_value,
1467 old_value,
1468 old_value_temp,
1469 /*store_result=*/ Register(), // Use a scratch register.
1470 /*update_old_value=*/ false,
1471 codegen);
Vladimir Marko94796f82018-08-08 15:15:33 +01001472 codegen->AddSlowPath(slow_path);
1473 exit_loop = slow_path->GetExitLabel();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001474 cmp_failure = slow_path->GetEntryLabel();
Vladimir Marko94796f82018-08-08 15:15:33 +01001475 } else {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001476 old_value = temps.AcquireSameSizeAs(new_value);
Vladimir Marko94796f82018-08-08 15:15:33 +01001477 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001478
1479 __ Add(tmp_ptr, base.X(), Operand(offset));
1480
Vladimir Markoe17530a2020-11-11 17:02:26 +00001481 GenerateCompareAndSet(codegen,
1482 type,
1483 std::memory_order_seq_cst,
1484 /*strong=*/ true,
1485 cmp_failure,
1486 tmp_ptr,
1487 new_value,
1488 old_value,
1489 /*store_result=*/ old_value.W(), // Reuse `old_value` for ST*XR* result.
1490 expected);
Vladimir Marko94796f82018-08-08 15:15:33 +01001491 __ Bind(exit_loop);
1492 __ Cset(out, eq);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001493}
1494
1495void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001496 VisitJdkUnsafeCASInt(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001497}
1498void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001499 VisitJdkUnsafeCASLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001500}
1501void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001502 VisitJdkUnsafeCASObject(invoke);
1503}
1504
1505void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001506 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
1507 VisitJdkUnsafeCompareAndSetInt(invoke);
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001508}
1509void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001510 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1511 VisitJdkUnsafeCompareAndSetLong(invoke);
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001512}
1513void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001514 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1515 VisitJdkUnsafeCompareAndSetObject(invoke);
1516}
1517
1518void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1519 CreateUnsafeCASLocations(allocator_, invoke);
1520}
1521void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1522 CreateUnsafeCASLocations(allocator_, invoke);
1523}
1524void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
1525 // The only supported read barrier implementation is the Baker-style read barriers.
Roland Levillaina1aa3b12016-10-26 13:03:38 +01001526 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain985ff702015-10-23 13:25:35 +01001527 return;
1528 }
1529
Vladimir Markoe17530a2020-11-11 17:02:26 +00001530 CreateUnsafeCASLocations(allocator_, invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001531 if (kEmitCompilerReadBarrier) {
1532 // We need two non-scratch temporary registers for read barrier.
1533 LocationSummary* locations = invoke->GetLocations();
1534 if (kUseBakerReadBarrier) {
1535 locations->AddTemp(Location::RequiresRegister());
1536 locations->AddTemp(Location::RequiresRegister());
1537 } else {
1538 // To preserve the old value across the non-Baker read barrier
1539 // slow path, use a fixed callee-save register.
Vladimir Markoc8178f52020-11-24 10:38:16 +00001540 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
1541 locations->AddTemp(Location::RegisterLocation(first_callee_save));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00001542 // To reduce the number of moves, request x0 as the second temporary.
1543 DCHECK(InvokeRuntimeCallingConvention().GetReturnLocation(DataType::Type::kReference).Equals(
1544 Location::RegisterLocation(x0.GetCode())));
1545 locations->AddTemp(Location::RegisterLocation(x0.GetCode()));
1546 }
1547 }
Andreas Gampe878d58c2015-01-15 23:24:00 -08001548}
1549
1550void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001551 VisitJdkUnsafeCASInt(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001552}
1553void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001554 VisitJdkUnsafeCASLong(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001555}
1556void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001557 VisitJdkUnsafeCASObject(invoke);
1558}
1559
1560void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001561 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1562 VisitJdkUnsafeCompareAndSetInt(invoke);
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001563}
1564void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001565 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
1566 VisitJdkUnsafeCompareAndSetLong(invoke);
Sorin Basca2f01e8e2021-06-18 06:44:07 +00001567}
1568void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001569 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
1570 VisitJdkUnsafeCompareAndSetObject(invoke);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001571}
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001572
Sorin Basca0069ad72021-09-17 17:33:09 +00001573void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
1574 GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
1575}
Sorin Basca507cf902021-10-06 12:04:56 +00001576void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
1577 GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
1578}
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001579void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
1580 // The only supported read barrier implementation is the Baker-style read barriers.
Santiago Aboy Solanes872ec722022-02-18 14:10:25 +00001581 DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
Ulya Trafimovich70102e62022-01-14 15:20:38 +00001582
1583 GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
1584}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001585
Vladimir Markoe1510d42020-11-13 11:07:13 +00001586enum class GetAndUpdateOp {
1587 kSet,
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001588 kAdd,
Vladimir Marko98873af2020-12-16 12:10:03 +00001589 kAddWithByteSwap,
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001590 kAnd,
1591 kOr,
1592 kXor
Vladimir Markoe1510d42020-11-13 11:07:13 +00001593};
1594
1595static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
1596 GetAndUpdateOp get_and_update_op,
1597 DataType::Type load_store_type,
1598 std::memory_order order,
1599 Register ptr,
1600 CPURegister arg,
1601 CPURegister old_value) {
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001602 MacroAssembler* masm = codegen->GetVIXLAssembler();
1603 UseScratchRegisterScope temps(masm);
1604 Register store_result = temps.AcquireW();
1605
Vladimir Markoe1510d42020-11-13 11:07:13 +00001606 DCHECK_EQ(old_value.GetSizeInBits(), arg.GetSizeInBits());
1607 Register old_value_reg;
1608 Register new_value;
1609 switch (get_and_update_op) {
1610 case GetAndUpdateOp::kSet:
1611 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1612 new_value = arg.IsX() ? arg.X() : arg.W();
1613 break;
Vladimir Marko98873af2020-12-16 12:10:03 +00001614 case GetAndUpdateOp::kAddWithByteSwap:
Vladimir Markoe1510d42020-11-13 11:07:13 +00001615 case GetAndUpdateOp::kAdd:
1616 if (arg.IsVRegister()) {
1617 old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
1618 new_value = old_value_reg; // Use the same temporary.
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001619 break;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001620 }
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001621 FALLTHROUGH_INTENDED;
1622 case GetAndUpdateOp::kAnd:
1623 case GetAndUpdateOp::kOr:
1624 case GetAndUpdateOp::kXor:
1625 old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
1626 new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
Vladimir Markoe1510d42020-11-13 11:07:13 +00001627 break;
1628 }
1629
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001630 bool use_load_acquire =
1631 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
1632 bool use_store_release =
1633 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
1634 DCHECK(use_load_acquire || use_store_release);
1635
1636 vixl::aarch64::Label loop_label;
1637 __ Bind(&loop_label);
Vladimir Markoe1510d42020-11-13 11:07:13 +00001638 EmitLoadExclusive(codegen, load_store_type, ptr, old_value_reg, use_load_acquire);
1639 switch (get_and_update_op) {
1640 case GetAndUpdateOp::kSet:
1641 break;
Vladimir Marko98873af2020-12-16 12:10:03 +00001642 case GetAndUpdateOp::kAddWithByteSwap:
1643 // To avoid unnecessary sign extension before REV16, the caller must specify `kUint16`
1644 // instead of `kInt16` and do the sign-extension explicitly afterwards.
1645 DCHECK_NE(load_store_type, DataType::Type::kInt16);
1646 GenerateReverseBytes(masm, load_store_type, old_value_reg, old_value_reg);
1647 FALLTHROUGH_INTENDED;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001648 case GetAndUpdateOp::kAdd:
1649 if (arg.IsVRegister()) {
1650 VRegister old_value_vreg = old_value.IsD() ? old_value.D() : old_value.S();
1651 VRegister sum = temps.AcquireSameSizeAs(old_value_vreg);
1652 __ Fmov(old_value_vreg, old_value_reg);
1653 __ Fadd(sum, old_value_vreg, arg.IsD() ? arg.D() : arg.S());
1654 __ Fmov(new_value, sum);
1655 } else {
1656 __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1657 }
Vladimir Marko98873af2020-12-16 12:10:03 +00001658 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
1659 GenerateReverseBytes(masm, load_store_type, new_value, new_value);
1660 }
Vladimir Markoe1510d42020-11-13 11:07:13 +00001661 break;
Vladimir Markoc2d5c702020-11-13 15:28:33 +00001662 case GetAndUpdateOp::kAnd:
1663 __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1664 break;
1665 case GetAndUpdateOp::kOr:
1666 __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1667 break;
1668 case GetAndUpdateOp::kXor:
1669 __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
1670 break;
Vladimir Markoe1510d42020-11-13 11:07:13 +00001671 }
1672 EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00001673 __ Cbnz(store_result, &loop_label);
1674}
1675
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001676void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01001677 LocationSummary* locations =
1678 new (allocator_) LocationSummary(invoke,
1679 invoke->InputAt(1)->CanBeNull()
1680 ? LocationSummary::kCallOnSlowPath
1681 : LocationSummary::kNoCall,
1682 kIntrinsified);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001683 locations->SetInAt(0, Location::RequiresRegister());
1684 locations->SetInAt(1, Location::RequiresRegister());
1685 locations->AddTemp(Location::RequiresRegister());
1686 locations->AddTemp(Location::RequiresRegister());
1687 locations->AddTemp(Location::RequiresRegister());
jessicahandojo05765752016-09-09 19:01:32 -07001688 // Need temporary registers for String compression's feature.
1689 if (mirror::kUseStringCompression) {
1690 locations->AddTemp(Location::RequiresRegister());
jessicahandojo05765752016-09-09 19:01:32 -07001691 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001692 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001693}
1694
1695void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01001696 MacroAssembler* masm = GetVIXLAssembler();
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001697 LocationSummary* locations = invoke->GetLocations();
1698
Alexandre Rames2ea91532016-08-11 17:04:14 +01001699 Register str = InputRegisterAt(invoke, 0);
1700 Register arg = InputRegisterAt(invoke, 1);
1701 DCHECK(str.IsW());
1702 DCHECK(arg.IsW());
Scott Wakeling1f36f412016-04-21 11:13:45 +01001703 Register out = OutputRegister(invoke);
1704
1705 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1706 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1707 Register temp2 = WRegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001708 Register temp3;
jessicahandojo05765752016-09-09 19:01:32 -07001709 if (mirror::kUseStringCompression) {
1710 temp3 = WRegisterFrom(locations->GetTemp(3));
jessicahandojo05765752016-09-09 19:01:32 -07001711 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001712
Scott Wakeling97c72b72016-06-24 16:19:36 +01001713 vixl::aarch64::Label loop;
1714 vixl::aarch64::Label find_char_diff;
1715 vixl::aarch64::Label end;
jessicahandojo05765752016-09-09 19:01:32 -07001716 vixl::aarch64::Label different_compression;
Scott Wakeling1f36f412016-04-21 11:13:45 +01001717
1718 // Get offsets of count and value fields within a string object.
1719 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1720 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1721
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001722 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001723 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001724
Scott Wakeling1f36f412016-04-21 11:13:45 +01001725 // Take slow path and throw if input can be and is null.
1726 SlowPathCodeARM64* slow_path = nullptr;
1727 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1728 if (can_slow_path) {
Vladimir Marko174b2e22017-10-12 13:34:49 +01001729 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001730 codegen_->AddSlowPath(slow_path);
1731 __ Cbz(arg, slow_path->GetEntryLabel());
1732 }
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001733
Scott Wakeling1f36f412016-04-21 11:13:45 +01001734 // Reference equality check, return 0 if same reference.
1735 __ Subs(out, str, arg);
1736 __ B(&end, eq);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001737
jessicahandojo05765752016-09-09 19:01:32 -07001738 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001739 // Load `count` fields of this and argument strings.
jessicahandojo05765752016-09-09 19:01:32 -07001740 __ Ldr(temp3, HeapOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001741 __ Ldr(temp2, HeapOperand(arg, count_offset));
jessicahandojo05765752016-09-09 19:01:32 -07001742 // Clean out compression flag from lengths.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001743 __ Lsr(temp0, temp3, 1u);
1744 __ Lsr(temp1, temp2, 1u);
jessicahandojo05765752016-09-09 19:01:32 -07001745 } else {
1746 // Load lengths of this and argument strings.
1747 __ Ldr(temp0, HeapOperand(str, count_offset));
1748 __ Ldr(temp1, HeapOperand(arg, count_offset));
1749 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001750 // out = length diff.
1751 __ Subs(out, temp0, temp1);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001752 // temp0 = min(len(str), len(arg)).
1753 __ Csel(temp0, temp1, temp0, ge);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001754 // Shorter string is empty?
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001755 __ Cbz(temp0, &end);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001756
jessicahandojo05765752016-09-09 19:01:32 -07001757 if (mirror::kUseStringCompression) {
1758 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001759 __ Eor(temp2, temp2, Operand(temp3));
1760 // Interleave with compression flag extraction which is needed for both paths
1761 // and also set flags which is needed only for the different compressions path.
1762 __ Ands(temp3.W(), temp3.W(), Operand(1));
1763 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
jessicahandojo05765752016-09-09 19:01:32 -07001764 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001765 // Store offset of string value in preparation for comparison loop.
1766 __ Mov(temp1, value_offset);
jessicahandojo05765752016-09-09 19:01:32 -07001767 if (mirror::kUseStringCompression) {
1768 // For string compression, calculate the number of bytes to compare (not chars).
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001769 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1770 __ Lsl(temp0, temp0, temp3);
jessicahandojo05765752016-09-09 19:01:32 -07001771 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001772
1773 UseScratchRegisterScope scratch_scope(masm);
1774 Register temp4 = scratch_scope.AcquireX();
1775
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001776 // Assertions that must hold in order to compare strings 8 bytes at a time.
Scott Wakeling1f36f412016-04-21 11:13:45 +01001777 DCHECK_ALIGNED(value_offset, 8);
1778 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1779
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001780 const size_t char_size = DataType::Size(DataType::Type::kUint16);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001781 DCHECK_EQ(char_size, 2u);
1782
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001783 // Promote temp2 to an X reg, ready for LDR.
1784 temp2 = temp2.X();
Scott Wakeling1f36f412016-04-21 11:13:45 +01001785
1786 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1787 __ Bind(&loop);
Alexandre Rames2ea91532016-08-11 17:04:14 +01001788 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001789 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1790 __ Cmp(temp4, temp2);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001791 __ B(ne, &find_char_diff);
1792 __ Add(temp1, temp1, char_size * 4);
jessicahandojo05765752016-09-09 19:01:32 -07001793 // With string compression, we have compared 8 bytes, otherwise 4 chars.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001794 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1795 __ B(&loop, hi);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001796 __ B(&end);
1797
1798 // Promote temp1 to an X reg, ready for EOR.
1799 temp1 = temp1.X();
1800
jessicahandojo05765752016-09-09 19:01:32 -07001801 // Find the single character difference.
Scott Wakeling1f36f412016-04-21 11:13:45 +01001802 __ Bind(&find_char_diff);
1803 // Get the bit position of the first character that differs.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001804 __ Eor(temp1, temp2, temp4);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001805 __ Rbit(temp1, temp1);
1806 __ Clz(temp1, temp1);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001807
jessicahandojo05765752016-09-09 19:01:32 -07001808 // If the number of chars remaining <= the index where the difference occurs (0-3), then
Scott Wakeling1f36f412016-04-21 11:13:45 +01001809 // the difference occurs outside the remaining string data, so just return length diff (out).
jessicahandojo05765752016-09-09 19:01:32 -07001810 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1811 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1812 // unsigned when string compression is disabled.
1813 // When it's enabled, the comparison must be unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001814 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
jessicahandojo05765752016-09-09 19:01:32 -07001815 __ B(ls, &end);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001816
Scott Wakeling1f36f412016-04-21 11:13:45 +01001817 // Extract the characters and calculate the difference.
jessicahandojo05765752016-09-09 19:01:32 -07001818 if (mirror:: kUseStringCompression) {
jessicahandojo05765752016-09-09 19:01:32 -07001819 __ Bic(temp1, temp1, 0x7);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001820 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1821 } else {
1822 __ Bic(temp1, temp1, 0xf);
jessicahandojo05765752016-09-09 19:01:32 -07001823 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001824 __ Lsr(temp2, temp2, temp1);
Scott Wakeling1f36f412016-04-21 11:13:45 +01001825 __ Lsr(temp4, temp4, temp1);
jessicahandojo05765752016-09-09 19:01:32 -07001826 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001827 // Prioritize the case of compressed strings and calculate such result first.
1828 __ Uxtb(temp1, temp4);
1829 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1830 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
jessicahandojo05765752016-09-09 19:01:32 -07001831 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001832 __ Uxth(temp4, temp4);
1833 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
jessicahandojo05765752016-09-09 19:01:32 -07001834
1835 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001836 __ B(&end);
1837 __ Bind(&different_compression);
1838
1839 // Comparison for different compression style.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001840 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
jessicahandojo05765752016-09-09 19:01:32 -07001841 DCHECK_EQ(c_char_size, 1u);
jessicahandojo05765752016-09-09 19:01:32 -07001842 temp1 = temp1.W();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001843 temp2 = temp2.W();
1844 temp4 = temp4.W();
jessicahandojo05765752016-09-09 19:01:32 -07001845
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001846 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1847 // Note that flags have been set by the `str` compression flag extraction to `temp3`
1848 // before branching to the `different_compression` label.
1849 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
1850 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
jessicahandojo05765752016-09-09 19:01:32 -07001851
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001852 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1853 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1854 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1855 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1856 __ Lsl(temp0, temp0, 1u);
1857
1858 // Adjust temp1 and temp2 from string pointers to data pointers.
1859 __ Add(temp1, temp1, Operand(value_offset));
1860 __ Add(temp2, temp2, Operand(value_offset));
1861
1862 // Complete the move of the compression flag.
1863 __ Sub(temp0, temp0, Operand(temp3));
1864
1865 vixl::aarch64::Label different_compression_loop;
1866 vixl::aarch64::Label different_compression_diff;
1867
1868 __ Bind(&different_compression_loop);
1869 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1870 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1871 __ Subs(temp4, temp4, Operand(temp3));
1872 __ B(&different_compression_diff, ne);
1873 __ Subs(temp0, temp0, 2);
1874 __ B(&different_compression_loop, hi);
jessicahandojo05765752016-09-09 19:01:32 -07001875 __ B(&end);
1876
1877 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001878 __ Bind(&different_compression_diff);
1879 __ Tst(temp0, Operand(1));
1880 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1881 "Expecting 0=compressed, 1=uncompressed");
1882 __ Cneg(out, temp4, ne);
jessicahandojo05765752016-09-09 19:01:32 -07001883 }
Scott Wakeling1f36f412016-04-21 11:13:45 +01001884
1885 __ Bind(&end);
1886
1887 if (can_slow_path) {
1888 __ Bind(slow_path->GetExitLabel());
1889 }
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001890}
1891
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001892// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1893// The normal loop plus the pre-header is 9 instructions without string compression and 12
1894// instructions with string compression. We can compare up to 8 bytes in 4 instructions
1895// (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1896// to 10 instructions for the unrolled loop.
1897constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1898
1899static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1900 if (candidate->IsLoadString()) {
1901 HLoadString* load_string = candidate->AsLoadString();
1902 const DexFile& dex_file = load_string->GetDexFile();
1903 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1904 }
1905 return nullptr;
1906}
1907
Agi Csakiea34b402015-08-13 17:51:19 -07001908void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01001909 LocationSummary* locations =
1910 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Agi Csakiea34b402015-08-13 17:51:19 -07001911 locations->SetInAt(0, Location::RequiresRegister());
1912 locations->SetInAt(1, Location::RequiresRegister());
Agi Csakiea34b402015-08-13 17:51:19 -07001913
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001914 // For the generic implementation and for long const strings we need a temporary.
1915 // We do not need it for short const strings, up to 8 bytes, see code generation below.
1916 uint32_t const_string_length = 0u;
1917 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1918 if (const_string == nullptr) {
1919 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1920 }
1921 bool is_compressed =
1922 mirror::kUseStringCompression &&
1923 const_string != nullptr &&
1924 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1925 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1926 locations->AddTemp(Location::RequiresRegister());
1927 }
1928
1929 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1930 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1931 // Then we shall need an extra temporary register instead of the output register.
Agi Csakiea34b402015-08-13 17:51:19 -07001932 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1933}
1934
1935void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01001936 MacroAssembler* masm = GetVIXLAssembler();
Agi Csakiea34b402015-08-13 17:51:19 -07001937 LocationSummary* locations = invoke->GetLocations();
1938
1939 Register str = WRegisterFrom(locations->InAt(0));
1940 Register arg = WRegisterFrom(locations->InAt(1));
1941 Register out = XRegisterFrom(locations->Out());
1942
1943 UseScratchRegisterScope scratch_scope(masm);
1944 Register temp = scratch_scope.AcquireW();
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001945 Register temp1 = scratch_scope.AcquireW();
Agi Csakiea34b402015-08-13 17:51:19 -07001946
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001947 vixl::aarch64::Label loop;
Scott Wakeling97c72b72016-06-24 16:19:36 +01001948 vixl::aarch64::Label end;
1949 vixl::aarch64::Label return_true;
1950 vixl::aarch64::Label return_false;
Agi Csakiea34b402015-08-13 17:51:19 -07001951
1952 // Get offsets of count, value, and class fields within a string object.
1953 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1954 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1955 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1956
1957 // Note that the null check must have been done earlier.
1958 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1959
Vladimir Marko53b52002016-05-24 19:30:45 +01001960 StringEqualsOptimizations optimizations(invoke);
1961 if (!optimizations.GetArgumentNotNull()) {
1962 // Check if input is null, return false if it is.
1963 __ Cbz(arg, &return_false);
1964 }
Agi Csakiea34b402015-08-13 17:51:19 -07001965
1966 // Reference equality check, return true if same reference.
1967 __ Cmp(str, arg);
1968 __ B(&return_true, eq);
1969
Vladimir Marko53b52002016-05-24 19:30:45 +01001970 if (!optimizations.GetArgumentIsString()) {
1971 // Instanceof check for the argument by comparing class fields.
1972 // All string objects must have the same type since String cannot be subclassed.
1973 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1974 // If the argument is a string object, its class field must be equal to receiver's class field.
Roland Levillain1d775d22018-09-07 13:56:57 +01001975 //
1976 // As the String class is expected to be non-movable, we can read the class
1977 // field from String.equals' arguments without read barriers.
1978 AssertNonMovableStringClass();
1979 // /* HeapReference<Class> */ temp = str->klass_
Vladimir Marko53b52002016-05-24 19:30:45 +01001980 __ Ldr(temp, MemOperand(str.X(), class_offset));
Roland Levillain1d775d22018-09-07 13:56:57 +01001981 // /* HeapReference<Class> */ temp1 = arg->klass_
Vladimir Marko53b52002016-05-24 19:30:45 +01001982 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
Roland Levillain1d775d22018-09-07 13:56:57 +01001983 // Also, because we use the previously loaded class references only in the
1984 // following comparison, we don't need to unpoison them.
Vladimir Marko53b52002016-05-24 19:30:45 +01001985 __ Cmp(temp, temp1);
1986 __ B(&return_false, ne);
1987 }
Agi Csakiea34b402015-08-13 17:51:19 -07001988
Vladimir Markoe39f14f2017-02-10 15:44:25 +00001989 // Check if one of the inputs is a const string. Do not special-case both strings
1990 // being const, such cases should be handled by constant folding if needed.
1991 uint32_t const_string_length = 0u;
1992 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1993 if (const_string == nullptr) {
1994 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1995 if (const_string != nullptr) {
1996 std::swap(str, arg); // Make sure the const string is in `str`.
1997 }
1998 }
1999 bool is_compressed =
2000 mirror::kUseStringCompression &&
2001 const_string != nullptr &&
2002 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
2003
2004 if (const_string != nullptr) {
2005 // Load `count` field of the argument string and check if it matches the const string.
2006 // Also compares the compression style, if differs return false.
2007 __ Ldr(temp, MemOperand(arg.X(), count_offset));
Vladimir Marko26ec3ca2017-03-14 13:37:14 +00002008 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
2009 scratch_scope.Release(temp1);
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002010 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
Vladimir Marko26ec3ca2017-03-14 13:37:14 +00002011 temp1 = scratch_scope.AcquireW();
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002012 __ B(&return_false, ne);
2013 } else {
2014 // Load `count` fields of this and argument strings.
2015 __ Ldr(temp, MemOperand(str.X(), count_offset));
2016 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
2017 // Check if `count` fields are equal, return false if they're not.
2018 // Also compares the compression style, if differs return false.
2019 __ Cmp(temp, temp1);
2020 __ B(&return_false, ne);
2021 }
Agi Csakiea34b402015-08-13 17:51:19 -07002022
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002023 // Assertions that must hold in order to compare strings 8 bytes at a time.
Vladimir Marko984519c2017-08-23 10:45:29 +01002024 // Ok to do this because strings are zero-padded to kObjectAlignment.
Agi Csakiea34b402015-08-13 17:51:19 -07002025 DCHECK_ALIGNED(value_offset, 8);
2026 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
2027
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002028 if (const_string != nullptr &&
Vladimir Marko984519c2017-08-23 10:45:29 +01002029 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
2030 : kShortConstStringEqualsCutoffInBytes / 2u)) {
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002031 // Load and compare the contents. Though we know the contents of the short const string
2032 // at compile time, materializing constants may be more code than loading from memory.
2033 int32_t offset = value_offset;
2034 size_t remaining_bytes =
2035 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
2036 temp = temp.X();
2037 temp1 = temp1.X();
Vladimir Marko984519c2017-08-23 10:45:29 +01002038 while (remaining_bytes > sizeof(uint64_t)) {
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002039 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2040 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
2041 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
2042 __ Cmp(temp, temp2);
2043 __ Ccmp(temp1, out, NoFlag, eq);
2044 __ B(&return_false, ne);
2045 offset += 2u * sizeof(uint64_t);
2046 remaining_bytes -= 2u * sizeof(uint64_t);
2047 }
2048 if (remaining_bytes != 0u) {
2049 __ Ldr(temp, MemOperand(str.X(), offset));
2050 __ Ldr(temp1, MemOperand(arg.X(), offset));
2051 __ Cmp(temp, temp1);
2052 __ B(&return_false, ne);
2053 }
2054 } else {
2055 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
2056 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2057 "Expecting 0=compressed, 1=uncompressed");
2058 __ Cbz(temp, &return_true);
2059
2060 if (mirror::kUseStringCompression) {
2061 // For string compression, calculate the number of bytes to compare (not chars).
2062 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
2063 __ And(temp1, temp, Operand(1)); // Extract compression flag.
2064 __ Lsr(temp, temp, 1u); // Extract length.
2065 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
2066 }
2067
2068 // Store offset of string value in preparation for comparison loop
2069 __ Mov(temp1, value_offset);
2070
2071 temp1 = temp1.X();
2072 Register temp2 = XRegisterFrom(locations->GetTemp(0));
2073 // Loop to compare strings 8 bytes at a time starting at the front of the string.
Vladimir Markoe39f14f2017-02-10 15:44:25 +00002074 __ Bind(&loop);
2075 __ Ldr(out, MemOperand(str.X(), temp1));
2076 __ Ldr(temp2, MemOperand(arg.X(), temp1));
2077 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
2078 __ Cmp(out, temp2);
2079 __ B(&return_false, ne);
2080 // With string compression, we have compared 8 bytes, otherwise 4 chars.
2081 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
2082 __ B(&loop, hi);
jessicahandojo05765752016-09-09 19:01:32 -07002083 }
2084
Agi Csakiea34b402015-08-13 17:51:19 -07002085 // Return true and exit the function.
2086 // If loop does not result in returning false, we return true.
2087 __ Bind(&return_true);
2088 __ Mov(out, 1);
2089 __ B(&end);
2090
2091 // Return false and exit the function.
2092 __ Bind(&return_false);
2093 __ Mov(out, 0);
2094 __ Bind(&end);
2095}
2096
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002097static void GenerateVisitStringIndexOf(HInvoke* invoke,
Scott Wakeling97c72b72016-06-24 16:19:36 +01002098 MacroAssembler* masm,
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002099 CodeGeneratorARM64* codegen,
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002100 bool start_at_zero) {
2101 LocationSummary* locations = invoke->GetLocations();
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002102
2103 // Note that the null check must have been done earlier.
2104 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
2105
2106 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002107 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002108 SlowPathCodeARM64* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002109 HInstruction* code_point = invoke->InputAt(1);
2110 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01002111 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002112 // Always needs the slow-path. We could directly dispatch to it, but this case should be
2113 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
Vladimir Marko174b2e22017-10-12 13:34:49 +01002114 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002115 codegen->AddSlowPath(slow_path);
2116 __ B(slow_path->GetEntryLabel());
2117 __ Bind(slow_path->GetExitLabel());
2118 return;
2119 }
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002120 } else if (code_point->GetType() != DataType::Type::kUint16) {
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002121 Register char_reg = WRegisterFrom(locations->InAt(1));
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002122 __ Tst(char_reg, 0xFFFF0000);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002123 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002124 codegen->AddSlowPath(slow_path);
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002125 __ B(ne, slow_path->GetEntryLabel());
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002126 }
2127
2128 if (start_at_zero) {
2129 // Start-index = 0.
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002130 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002131 __ Mov(tmp_reg, 0);
2132 }
2133
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002134 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
Roland Levillain42ad2882016-02-29 18:26:54 +00002135 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002136
2137 if (slow_path != nullptr) {
2138 __ Bind(slow_path->GetExitLabel());
2139 }
2140}
2141
2142void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002143 LocationSummary* locations = new (allocator_) LocationSummary(
2144 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002145 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2146 // best to align the inputs accordingly.
2147 InvokeRuntimeCallingConvention calling_convention;
2148 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2149 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002150 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002151
Vladimir Markofb6c90a2016-05-06 15:52:12 +01002152 // Need to send start_index=0.
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002153 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2154}
2155
2156void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08002157 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002158}
2159
2160void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002161 LocationSummary* locations = new (allocator_) LocationSummary(
2162 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002163 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2164 // best to align the inputs accordingly.
2165 InvokeRuntimeCallingConvention calling_convention;
2166 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2167 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2168 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002169 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002170}
2171
2172void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08002173 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
Andreas Gampeba6fdbc2015-05-07 22:31:55 -07002174}
2175
Jeff Hao848f70a2014-01-15 13:49:50 -08002176void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002177 LocationSummary* locations = new (allocator_) LocationSummary(
2178 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002179 InvokeRuntimeCallingConvention calling_convention;
2180 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2181 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2182 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2183 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002184 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002185}
2186
2187void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002188 MacroAssembler* masm = GetVIXLAssembler();
Jeff Hao848f70a2014-01-15 13:49:50 -08002189 LocationSummary* locations = invoke->GetLocations();
2190
2191 Register byte_array = WRegisterFrom(locations->InAt(0));
2192 __ Cmp(byte_array, 0);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002193 SlowPathCodeARM64* slow_path =
2194 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08002195 codegen_->AddSlowPath(slow_path);
2196 __ B(eq, slow_path->GetEntryLabel());
2197
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002198 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
Roland Levillainf969a202016-03-09 16:14:00 +00002199 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002200 __ Bind(slow_path->GetExitLabel());
2201}
2202
2203void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002204 LocationSummary* locations =
2205 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002206 InvokeRuntimeCallingConvention calling_convention;
2207 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2208 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2209 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002210 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002211}
2212
2213void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
Roland Levillaincc3839c2016-02-29 16:23:48 +00002214 // No need to emit code checking whether `locations->InAt(2)` is a null
2215 // pointer, as callers of the native method
2216 //
2217 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2218 //
2219 // all include a null check on `data` before calling that method.
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002220 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00002221 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002222}
2223
2224void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002225 LocationSummary* locations = new (allocator_) LocationSummary(
2226 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
Jeff Hao848f70a2014-01-15 13:49:50 -08002227 InvokeRuntimeCallingConvention calling_convention;
2228 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002229 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
Jeff Hao848f70a2014-01-15 13:49:50 -08002230}
2231
2232void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002233 MacroAssembler* masm = GetVIXLAssembler();
Jeff Hao848f70a2014-01-15 13:49:50 -08002234 LocationSummary* locations = invoke->GetLocations();
2235
2236 Register string_to_copy = WRegisterFrom(locations->InAt(0));
2237 __ Cmp(string_to_copy, 0);
Vladimir Marko174b2e22017-10-12 13:34:49 +01002238 SlowPathCodeARM64* slow_path =
2239 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08002240 codegen_->AddSlowPath(slow_path);
2241 __ B(eq, slow_path->GetEntryLabel());
2242
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002243 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
Roland Levillainf969a202016-03-09 16:14:00 +00002244 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08002245 __ Bind(slow_path->GetExitLabel());
2246}
2247
Vladimir Markoca6fff82017-10-03 14:49:14 +01002248static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002249 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002250 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2251 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002252
Vladimir Markoca6fff82017-10-03 14:49:14 +01002253 LocationSummary* const locations =
2254 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002255 InvokeRuntimeCallingConvention calling_convention;
2256
2257 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2258 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2259}
2260
Vladimir Markoca6fff82017-10-03 14:49:14 +01002261static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002262 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002263 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2264 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2265 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002266
Vladimir Markoca6fff82017-10-03 14:49:14 +01002267 LocationSummary* const locations =
2268 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002269 InvokeRuntimeCallingConvention calling_convention;
2270
2271 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
2272 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
2273 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
2274}
2275
Nikita Iashchenko3fa6e462021-09-10 17:30:04 +01002276static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2277 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
2278 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
2279 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
2280 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
2281 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
2282
2283 LocationSummary* const locations =
2284 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2285
2286 locations->SetInAt(0, Location::RequiresFpuRegister());
2287 locations->SetInAt(1, Location::RequiresFpuRegister());
2288 locations->SetInAt(2, Location::RequiresFpuRegister());
2289 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2290}
2291
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002292static void GenFPToFPCall(HInvoke* invoke,
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002293 CodeGeneratorARM64* codegen,
2294 QuickEntrypointEnum entry) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002295 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002296}
2297
2298void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002299 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002300}
2301
2302void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002303 GenFPToFPCall(invoke, codegen_, kQuickCos);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002304}
2305
2306void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002307 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002308}
2309
2310void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002311 GenFPToFPCall(invoke, codegen_, kQuickSin);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002312}
2313
2314void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002315 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002316}
2317
2318void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002319 GenFPToFPCall(invoke, codegen_, kQuickAcos);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002320}
2321
2322void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002323 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002324}
2325
2326void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002327 GenFPToFPCall(invoke, codegen_, kQuickAsin);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002328}
2329
2330void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002331 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002332}
2333
2334void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002335 GenFPToFPCall(invoke, codegen_, kQuickAtan);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002336}
2337
2338void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002339 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002340}
2341
2342void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002343 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002344}
2345
2346void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002347 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002348}
2349
2350void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002351 GenFPToFPCall(invoke, codegen_, kQuickCosh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002352}
2353
2354void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002355 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002356}
2357
2358void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002359 GenFPToFPCall(invoke, codegen_, kQuickExp);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002360}
2361
2362void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002363 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002364}
2365
2366void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002367 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002368}
2369
2370void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002371 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002372}
2373
2374void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002375 GenFPToFPCall(invoke, codegen_, kQuickLog);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002376}
2377
2378void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002379 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002380}
2381
2382void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002383 GenFPToFPCall(invoke, codegen_, kQuickLog10);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002384}
2385
2386void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002387 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002388}
2389
2390void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002391 GenFPToFPCall(invoke, codegen_, kQuickSinh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002392}
2393
2394void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002395 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002396}
2397
2398void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002399 GenFPToFPCall(invoke, codegen_, kQuickTan);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002400}
2401
2402void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002403 CreateFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002404}
2405
2406void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002407 GenFPToFPCall(invoke, codegen_, kQuickTanh);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002408}
2409
2410void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002411 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002412}
2413
2414void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002415 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002416}
2417
Vladimir Marko4d179872018-01-19 14:50:10 +00002418void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2419 CreateFPFPToFPCallLocations(allocator_, invoke);
2420}
2421
2422void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2423 GenFPToFPCall(invoke, codegen_, kQuickPow);
2424}
2425
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002426void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002427 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002428}
2429
2430void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002431 GenFPToFPCall(invoke, codegen_, kQuickHypot);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002432}
2433
2434void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002435 CreateFPFPToFPCallLocations(allocator_, invoke);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002436}
2437
2438void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
Serban Constantinescu22f81d32016-02-18 16:06:31 +00002439 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
Anton Kirilov02fc24e2016-01-20 16:48:19 +00002440}
2441
Tim Zhang25abd6c2016-01-19 23:39:24 +08002442void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01002443 LocationSummary* locations =
2444 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002445 locations->SetInAt(0, Location::RequiresRegister());
2446 locations->SetInAt(1, Location::RequiresRegister());
2447 locations->SetInAt(2, Location::RequiresRegister());
2448 locations->SetInAt(3, Location::RequiresRegister());
2449 locations->SetInAt(4, Location::RequiresRegister());
2450
2451 locations->AddTemp(Location::RequiresRegister());
2452 locations->AddTemp(Location::RequiresRegister());
Scott Wakelingdf109d92016-04-22 11:35:56 +01002453 locations->AddTemp(Location::RequiresRegister());
Tim Zhang25abd6c2016-01-19 23:39:24 +08002454}
2455
2456void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002457 MacroAssembler* masm = GetVIXLAssembler();
Tim Zhang25abd6c2016-01-19 23:39:24 +08002458 LocationSummary* locations = invoke->GetLocations();
2459
2460 // Check assumption that sizeof(Char) is 2 (used in scaling below).
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002461 const size_t char_size = DataType::Size(DataType::Type::kUint16);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002462 DCHECK_EQ(char_size, 2u);
2463
2464 // Location of data in char array buffer.
2465 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2466
2467 // Location of char array data in string.
2468 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2469
2470 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2471 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2472 Register srcObj = XRegisterFrom(locations->InAt(0));
2473 Register srcBegin = XRegisterFrom(locations->InAt(1));
2474 Register srcEnd = XRegisterFrom(locations->InAt(2));
2475 Register dstObj = XRegisterFrom(locations->InAt(3));
2476 Register dstBegin = XRegisterFrom(locations->InAt(4));
2477
2478 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002479 Register num_chr = XRegisterFrom(locations->GetTemp(1));
2480 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
Tim Zhang25abd6c2016-01-19 23:39:24 +08002481
2482 UseScratchRegisterScope temps(masm);
2483 Register dst_ptr = temps.AcquireX();
Scott Wakelingdf109d92016-04-22 11:35:56 +01002484 Register tmp2 = temps.AcquireX();
Tim Zhang25abd6c2016-01-19 23:39:24 +08002485
jessicahandojo05765752016-09-09 19:01:32 -07002486 vixl::aarch64::Label done;
David Horstmann53d220e2019-07-16 16:00:10 +01002487 vixl::aarch64::Label compressed_string_vector_loop;
2488 vixl::aarch64::Label compressed_string_remainder;
jessicahandojo05765752016-09-09 19:01:32 -07002489 __ Sub(num_chr, srcEnd, srcBegin);
2490 // Early out for valid zero-length retrievals.
2491 __ Cbz(num_chr, &done);
Tim Zhang25abd6c2016-01-19 23:39:24 +08002492
Scott Wakelingdf109d92016-04-22 11:35:56 +01002493 // dst address start to copy to.
Tim Zhang25abd6c2016-01-19 23:39:24 +08002494 __ Add(dst_ptr, dstObj, Operand(data_offset));
2495 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2496
jessicahandojo05765752016-09-09 19:01:32 -07002497 // src address to copy from.
2498 __ Add(src_ptr, srcObj, Operand(value_offset));
2499 vixl::aarch64::Label compressed_string_preloop;
2500 if (mirror::kUseStringCompression) {
2501 // Location of count in string.
2502 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2503 // String's length.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002504 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2505 __ Tbz(tmp2, 0, &compressed_string_preloop);
jessicahandojo05765752016-09-09 19:01:32 -07002506 }
2507 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002508
Tim Zhang25abd6c2016-01-19 23:39:24 +08002509 // Do the copy.
Scott Wakeling97c72b72016-06-24 16:19:36 +01002510 vixl::aarch64::Label loop;
Scott Wakeling97c72b72016-06-24 16:19:36 +01002511 vixl::aarch64::Label remainder;
Scott Wakelingdf109d92016-04-22 11:35:56 +01002512
Scott Wakelingdf109d92016-04-22 11:35:56 +01002513 // Save repairing the value of num_chr on the < 8 character path.
2514 __ Subs(tmp1, num_chr, 8);
2515 __ B(lt, &remainder);
2516
2517 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2518 __ Mov(num_chr, tmp1);
2519
2520 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2521 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
Tim Zhang25abd6c2016-01-19 23:39:24 +08002522 __ Bind(&loop);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002523 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002524 __ Subs(num_chr, num_chr, 8);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002525 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002526 __ B(ge, &loop);
2527
2528 __ Adds(num_chr, num_chr, 8);
2529 __ B(eq, &done);
2530
2531 // Main loop for < 8 character case and remainder handling. Loads and stores one
2532 // 16-bit Java character at a time.
2533 __ Bind(&remainder);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002534 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002535 __ Subs(num_chr, num_chr, 1);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002536 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
Scott Wakelingdf109d92016-04-22 11:35:56 +01002537 __ B(gt, &remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002538 __ B(&done);
2539
2540 if (mirror::kUseStringCompression) {
David Horstmann53d220e2019-07-16 16:00:10 +01002541 // For compressed strings, acquire a SIMD temporary register.
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01002542 VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002543 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
jessicahandojo05765752016-09-09 19:01:32 -07002544 DCHECK_EQ(c_char_size, 1u);
2545 __ Bind(&compressed_string_preloop);
2546 __ Add(src_ptr, src_ptr, Operand(srcBegin));
David Horstmann53d220e2019-07-16 16:00:10 +01002547
2548 // Save repairing the value of num_chr on the < 8 character path.
2549 __ Subs(tmp1, num_chr, 8);
2550 __ B(lt, &compressed_string_remainder);
2551
2552 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2553 __ Mov(num_chr, tmp1);
2554
2555 // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
2556 // Uses SIMD instructions.
2557 __ Bind(&compressed_string_vector_loop);
2558 __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
2559 __ Subs(num_chr, num_chr, 8);
2560 __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
2561 __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
2562 __ B(ge, &compressed_string_vector_loop);
2563
2564 __ Adds(num_chr, num_chr, 8);
2565 __ B(eq, &done);
2566
2567 // Loop for < 8 character case and remainder handling with a compressed src.
2568 // Copies 1 character (8-bit) to (16-bit) at a time.
2569 __ Bind(&compressed_string_remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002570 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2571 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2572 __ Subs(num_chr, num_chr, Operand(1));
David Horstmann53d220e2019-07-16 16:00:10 +01002573 __ B(gt, &compressed_string_remainder);
jessicahandojo05765752016-09-09 19:01:32 -07002574 }
Scott Wakelingdf109d92016-04-22 11:35:56 +01002575
Tim Zhang25abd6c2016-01-19 23:39:24 +08002576 __ Bind(&done);
2577}
2578
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002579// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2580// implementation there for longer copy lengths.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002581static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002582
2583static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2584 uint32_t at,
2585 HInstruction* input) {
2586 HIntConstant* const_input = input->AsIntConstant();
Scott Wakeling97c72b72016-06-24 16:19:36 +01002587 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002588 locations->SetInAt(at, Location::RequiresRegister());
2589 } else {
2590 locations->SetInAt(at, Location::RegisterOrConstant(input));
2591 }
2592}
2593
2594void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2595 // Check to see if we have known failures that will cause us to have to bail out
2596 // to the runtime, and just generate the runtime call directly.
2597 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2598 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2599
2600 // The positions must be non-negative.
2601 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2602 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2603 // We will have to fail anyways.
2604 return;
2605 }
2606
2607 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2608 // native implementation.
2609 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2610 if (length != nullptr) {
2611 int32_t len = length->GetValue();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002612 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002613 // Just call as normal.
2614 return;
2615 }
2616 }
2617
Vladimir Markoca6fff82017-10-03 14:49:14 +01002618 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2619 LocationSummary* locations =
2620 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002621 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2622 locations->SetInAt(0, Location::RequiresRegister());
2623 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2624 locations->SetInAt(2, Location::RequiresRegister());
2625 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2626 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2627
2628 locations->AddTemp(Location::RequiresRegister());
2629 locations->AddTemp(Location::RequiresRegister());
2630 locations->AddTemp(Location::RequiresRegister());
2631}
2632
Scott Wakeling97c72b72016-06-24 16:19:36 +01002633static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002634 const Location& pos,
2635 const Register& input,
2636 const Location& length,
2637 SlowPathCodeARM64* slow_path,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002638 const Register& temp,
2639 bool length_is_input_length = false) {
2640 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2641 if (pos.IsConstant()) {
2642 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2643 if (pos_const == 0) {
2644 if (!length_is_input_length) {
2645 // Check that length(input) >= length.
2646 __ Ldr(temp, MemOperand(input, length_offset));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002647 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002648 __ B(slow_path->GetEntryLabel(), lt);
2649 }
2650 } else {
2651 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002652 __ Ldr(temp, MemOperand(input, length_offset));
2653 __ Subs(temp, temp, pos_const);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002654 __ B(slow_path->GetEntryLabel(), lt);
2655
2656 // Check that (length(input) - pos) >= length.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002657 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002658 __ B(slow_path->GetEntryLabel(), lt);
2659 }
2660 } else if (length_is_input_length) {
2661 // The only way the copy can succeed is if pos is zero.
2662 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2663 } else {
2664 // Check that pos >= 0.
2665 Register pos_reg = WRegisterFrom(pos);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002666 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002667
2668 // Check that pos <= length(input) && (length(input) - pos) >= length.
2669 __ Ldr(temp, MemOperand(input, length_offset));
2670 __ Subs(temp, temp, pos_reg);
2671 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002672 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002673 __ B(slow_path->GetEntryLabel(), lt);
2674 }
2675}
2676
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002677// Compute base source address, base destination address, and end
2678// source address for System.arraycopy* intrinsics in `src_base`,
2679// `dst_base` and `src_end` respectively.
Scott Wakeling97c72b72016-06-24 16:19:36 +01002680static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002681 DataType::Type type,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002682 const Register& src,
2683 const Location& src_pos,
2684 const Register& dst,
2685 const Location& dst_pos,
2686 const Location& copy_length,
2687 const Register& src_base,
2688 const Register& dst_base,
2689 const Register& src_end) {
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002690 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002691 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
Roland Levillainebea3d22016-04-12 15:42:57 +01002692 << "Unexpected element type: " << type;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002693 const int32_t element_size = DataType::Size(type);
2694 const int32_t element_size_shift = DataType::SizeShift(type);
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002695 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002696
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002697 if (src_pos.IsConstant()) {
2698 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002699 __ Add(src_base, src, element_size * constant + data_offset);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002700 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002701 __ Add(src_base, src, data_offset);
2702 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002703 }
2704
2705 if (dst_pos.IsConstant()) {
2706 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002707 __ Add(dst_base, dst, element_size * constant + data_offset);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002708 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002709 __ Add(dst_base, dst, data_offset);
2710 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002711 }
2712
2713 if (copy_length.IsConstant()) {
2714 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
Roland Levillainebea3d22016-04-12 15:42:57 +01002715 __ Add(src_end, src_base, element_size * constant);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002716 } else {
Roland Levillainebea3d22016-04-12 15:42:57 +01002717 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002718 }
2719}
2720
2721void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
Scott Wakeling97c72b72016-06-24 16:19:36 +01002722 MacroAssembler* masm = GetVIXLAssembler();
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002723 LocationSummary* locations = invoke->GetLocations();
2724 Register src = XRegisterFrom(locations->InAt(0));
2725 Location src_pos = locations->InAt(1);
2726 Register dst = XRegisterFrom(locations->InAt(2));
2727 Location dst_pos = locations->InAt(3);
2728 Location length = locations->InAt(4);
2729
Vladimir Marko174b2e22017-10-12 13:34:49 +01002730 SlowPathCodeARM64* slow_path =
2731 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002732 codegen_->AddSlowPath(slow_path);
2733
2734 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2735 // copied in reverse and we can't know in all cases if it's needed.
2736 __ Cmp(src, dst);
2737 __ B(slow_path->GetEntryLabel(), eq);
2738
2739 // Bail out if the source is null.
2740 __ Cbz(src, slow_path->GetEntryLabel());
2741
2742 // Bail out if the destination is null.
2743 __ Cbz(dst, slow_path->GetEntryLabel());
2744
2745 if (!length.IsConstant()) {
Vladimir Markoc5646202016-11-28 16:03:15 +00002746 // Merge the following two comparisons into one:
2747 // If the length is negative, bail out (delegate to libcore's native implementation).
2748 // If the length > 32 then (currently) prefer libcore's native implementation.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002749 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
Vladimir Markoc5646202016-11-28 16:03:15 +00002750 __ B(slow_path->GetEntryLabel(), hi);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002751 } else {
2752 // We have already checked in the LocationsBuilder for the constant case.
2753 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2754 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2755 }
2756
2757 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2758 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2759 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2760
2761 CheckSystemArrayCopyPosition(masm,
2762 src_pos,
2763 src,
2764 length,
2765 slow_path,
2766 src_curr_addr,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002767 false);
2768
2769 CheckSystemArrayCopyPosition(masm,
2770 dst_pos,
2771 dst,
2772 length,
2773 slow_path,
2774 src_curr_addr,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002775 false);
2776
2777 src_curr_addr = src_curr_addr.X();
2778 dst_curr_addr = dst_curr_addr.X();
2779 src_stop_addr = src_stop_addr.X();
2780
2781 GenSystemArrayCopyAddresses(masm,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002782 DataType::Type::kUint16,
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002783 src,
2784 src_pos,
2785 dst,
2786 dst_pos,
2787 length,
2788 src_curr_addr,
2789 dst_curr_addr,
2790 src_stop_addr);
2791
2792 // Iterate over the arrays and do a raw copy of the chars.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002793 const int32_t char_size = DataType::Size(DataType::Type::kUint16);
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002794 UseScratchRegisterScope temps(masm);
2795 Register tmp = temps.AcquireW();
Scott Wakeling97c72b72016-06-24 16:19:36 +01002796 vixl::aarch64::Label loop, done;
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002797 __ Bind(&loop);
2798 __ Cmp(src_curr_addr, src_stop_addr);
2799 __ B(&done, eq);
Scott Wakeling97c72b72016-06-24 16:19:36 +01002800 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2801 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
Scott Wakelingd3d0da52016-02-29 15:17:20 +00002802 __ B(&loop);
2803 __ Bind(&done);
2804
2805 __ Bind(slow_path->GetExitLabel());
2806}
2807
donghui.baic2ec9ad2016-03-10 14:02:55 +08002808// We can choose to use the native implementation there for longer copy lengths.
2809static constexpr int32_t kSystemArrayCopyThreshold = 128;
2810
2811// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2812// We want to use two temporary registers in order to reduce the register pressure in arm64.
2813// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
2814void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002815 // The only read barrier implementation supporting the
2816 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2817 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain3d312422016-06-23 13:53:42 +01002818 return;
2819 }
2820
donghui.baic2ec9ad2016-03-10 14:02:55 +08002821 // Check to see if we have known failures that will cause us to have to bail out
2822 // to the runtime, and just generate the runtime call directly.
2823 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2824 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2825
2826 // The positions must be non-negative.
2827 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2828 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2829 // We will have to fail anyways.
2830 return;
2831 }
2832
2833 // The length must be >= 0.
2834 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2835 if (length != nullptr) {
2836 int32_t len = length->GetValue();
2837 if (len < 0 || len >= kSystemArrayCopyThreshold) {
2838 // Just call as normal.
2839 return;
2840 }
2841 }
2842
2843 SystemArrayCopyOptimizations optimizations(invoke);
2844
2845 if (optimizations.GetDestinationIsSource()) {
2846 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2847 // We only support backward copying if source and destination are the same.
2848 return;
2849 }
2850 }
2851
2852 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2853 // We currently don't intrinsify primitive copying.
2854 return;
2855 }
2856
Vladimir Markoca6fff82017-10-03 14:49:14 +01002857 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2858 LocationSummary* locations =
2859 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002860 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2861 locations->SetInAt(0, Location::RequiresRegister());
2862 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2863 locations->SetInAt(2, Location::RequiresRegister());
2864 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2865 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2866
2867 locations->AddTemp(Location::RequiresRegister());
2868 locations->AddTemp(Location::RequiresRegister());
Roland Levillain0b671c02016-08-19 12:02:34 +01002869 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2870 // Temporary register IP0, obtained from the VIXL scratch register
2871 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2872 // (because that register is clobbered by ReadBarrierMarkRegX
Roland Levillain54f869e2017-03-06 13:54:11 +00002873 // entry points). It cannot be used in calls to
2874 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2875 // either. For these reasons, get a third extra temporary register
2876 // from the register allocator.
Roland Levillain0b671c02016-08-19 12:02:34 +01002877 locations->AddTemp(Location::RequiresRegister());
Roland Levillain54f869e2017-03-06 13:54:11 +00002878 } else {
2879 // Cases other than Baker read barriers: the third temporary will
2880 // be acquired from the VIXL scratch register pool.
Roland Levillain0b671c02016-08-19 12:02:34 +01002881 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08002882}
2883
2884void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002885 // The only read barrier implementation supporting the
2886 // SystemArrayCopy intrinsic is the Baker-style read barriers.
Santiago Aboy Solanes872ec722022-02-18 14:10:25 +00002887 DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
Roland Levillain3d312422016-06-23 13:53:42 +01002888
Scott Wakeling97c72b72016-06-24 16:19:36 +01002889 MacroAssembler* masm = GetVIXLAssembler();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002890 LocationSummary* locations = invoke->GetLocations();
2891
2892 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2893 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2894 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2895 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Roland Levillain0b671c02016-08-19 12:02:34 +01002896 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
donghui.baic2ec9ad2016-03-10 14:02:55 +08002897
2898 Register src = XRegisterFrom(locations->InAt(0));
2899 Location src_pos = locations->InAt(1);
2900 Register dest = XRegisterFrom(locations->InAt(2));
2901 Location dest_pos = locations->InAt(3);
2902 Location length = locations->InAt(4);
2903 Register temp1 = WRegisterFrom(locations->GetTemp(0));
Roland Levillain0b671c02016-08-19 12:02:34 +01002904 Location temp1_loc = LocationFrom(temp1);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002905 Register temp2 = WRegisterFrom(locations->GetTemp(1));
Roland Levillain0b671c02016-08-19 12:02:34 +01002906 Location temp2_loc = LocationFrom(temp2);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002907
Vladimir Marko174b2e22017-10-12 13:34:49 +01002908 SlowPathCodeARM64* intrinsic_slow_path =
2909 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Roland Levillain0b671c02016-08-19 12:02:34 +01002910 codegen_->AddSlowPath(intrinsic_slow_path);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002911
Scott Wakeling97c72b72016-06-24 16:19:36 +01002912 vixl::aarch64::Label conditions_on_positions_validated;
donghui.baic2ec9ad2016-03-10 14:02:55 +08002913 SystemArrayCopyOptimizations optimizations(invoke);
2914
donghui.baic2ec9ad2016-03-10 14:02:55 +08002915 // If source and destination are the same, we go to slow path if we need to do
2916 // forward copying.
2917 if (src_pos.IsConstant()) {
2918 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2919 if (dest_pos.IsConstant()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002920 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2921 if (optimizations.GetDestinationIsSource()) {
2922 // Checked when building locations.
2923 DCHECK_GE(src_pos_constant, dest_pos_constant);
2924 } else if (src_pos_constant < dest_pos_constant) {
2925 __ Cmp(src, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01002926 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002927 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08002928 // Checked when building locations.
2929 DCHECK(!optimizations.GetDestinationIsSource()
2930 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2931 } else {
2932 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002933 __ Cmp(src, dest);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002934 __ B(&conditions_on_positions_validated, ne);
2935 }
2936 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
Roland Levillain0b671c02016-08-19 12:02:34 +01002937 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002938 }
2939 } else {
2940 if (!optimizations.GetDestinationIsSource()) {
Nicolas Geoffray9f65db82016-07-07 12:07:42 +01002941 __ Cmp(src, dest);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002942 __ B(&conditions_on_positions_validated, ne);
2943 }
2944 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2945 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
Roland Levillain0b671c02016-08-19 12:02:34 +01002946 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002947 }
2948
2949 __ Bind(&conditions_on_positions_validated);
2950
2951 if (!optimizations.GetSourceIsNotNull()) {
2952 // Bail out if the source is null.
Roland Levillain0b671c02016-08-19 12:02:34 +01002953 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08002954 }
2955
2956 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2957 // Bail out if the destination is null.
Roland Levillain0b671c02016-08-19 12:02:34 +01002958 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08002959 }
2960
2961 // We have already checked in the LocationsBuilder for the constant case.
2962 if (!length.IsConstant() &&
2963 !optimizations.GetCountIsSourceLength() &&
2964 !optimizations.GetCountIsDestinationLength()) {
Vladimir Markoc5646202016-11-28 16:03:15 +00002965 // Merge the following two comparisons into one:
2966 // If the length is negative, bail out (delegate to libcore's native implementation).
2967 // If the length >= 128 then (currently) prefer native implementation.
donghui.baic2ec9ad2016-03-10 14:02:55 +08002968 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
Vladimir Markoc5646202016-11-28 16:03:15 +00002969 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
donghui.baic2ec9ad2016-03-10 14:02:55 +08002970 }
2971 // Validity checks: source.
2972 CheckSystemArrayCopyPosition(masm,
2973 src_pos,
2974 src,
2975 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01002976 intrinsic_slow_path,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002977 temp1,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002978 optimizations.GetCountIsSourceLength());
2979
2980 // Validity checks: dest.
2981 CheckSystemArrayCopyPosition(masm,
2982 dest_pos,
2983 dest,
2984 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01002985 intrinsic_slow_path,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002986 temp1,
donghui.baic2ec9ad2016-03-10 14:02:55 +08002987 optimizations.GetCountIsDestinationLength());
2988 {
2989 // We use a block to end the scratch scope before the write barrier, thus
2990 // freeing the temporary registers so they can be used in `MarkGCCard`.
2991 UseScratchRegisterScope temps(masm);
Vladimir Markof4f2daa2017-03-20 18:26:59 +00002992 Location temp3_loc; // Used only for Baker read barrier.
Roland Levillain54f869e2017-03-06 13:54:11 +00002993 Register temp3;
2994 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Vladimir Markof4f2daa2017-03-20 18:26:59 +00002995 temp3_loc = locations->GetTemp(2);
2996 temp3 = WRegisterFrom(temp3_loc);
Roland Levillain54f869e2017-03-06 13:54:11 +00002997 } else {
2998 temp3 = temps.AcquireW();
2999 }
Roland Levillain0b671c02016-08-19 12:02:34 +01003000
donghui.baic2ec9ad2016-03-10 14:02:55 +08003001 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3002 // Check whether all elements of the source array are assignable to the component
3003 // type of the destination array. We do two checks: the classes are the same,
3004 // or the destination is Object[]. If none of these checks succeed, we go to the
3005 // slow path.
donghui.baic2ec9ad2016-03-10 14:02:55 +08003006
Roland Levillain0b671c02016-08-19 12:02:34 +01003007 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3008 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3009 // /* HeapReference<Class> */ temp1 = src->klass_
3010 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3011 temp1_loc,
3012 src.W(),
3013 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003014 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003015 /* needs_null_check= */ false,
3016 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003017 // Bail out if the source is not a non primitive array.
3018 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3019 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3020 temp1_loc,
3021 temp1,
3022 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003023 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003024 /* needs_null_check= */ false,
3025 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003026 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
3027 // If heap poisoning is enabled, `temp1` has been unpoisoned
3028 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3029 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
3030 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
3031 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3032 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003033 }
Roland Levillain0b671c02016-08-19 12:02:34 +01003034
3035 // /* HeapReference<Class> */ temp1 = dest->klass_
3036 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3037 temp1_loc,
3038 dest.W(),
3039 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003040 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003041 /* needs_null_check= */ false,
3042 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003043
3044 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3045 // Bail out if the destination is not a non primitive array.
3046 //
3047 // Register `temp1` is not trashed by the read barrier emitted
3048 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3049 // method produces a call to a ReadBarrierMarkRegX entry point,
3050 // which saves all potentially live registers, including
3051 // temporaries such a `temp1`.
3052 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3053 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3054 temp2_loc,
3055 temp1,
3056 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003057 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003058 /* needs_null_check= */ false,
3059 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003060 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3061 // If heap poisoning is enabled, `temp2` has been unpoisoned
3062 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3063 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
3064 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
3065 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3066 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
3067 }
3068
3069 // For the same reason given earlier, `temp1` is not trashed by the
3070 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3071 // /* HeapReference<Class> */ temp2 = src->klass_
3072 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3073 temp2_loc,
3074 src.W(),
3075 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003076 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003077 /* needs_null_check= */ false,
3078 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003079 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3080 __ Cmp(temp1, temp2);
3081
3082 if (optimizations.GetDestinationIsTypedObjectArray()) {
3083 vixl::aarch64::Label do_copy;
3084 __ B(&do_copy, eq);
3085 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3086 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3087 temp1_loc,
3088 temp1,
3089 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003090 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003091 /* needs_null_check= */ false,
3092 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003093 // /* HeapReference<Class> */ temp1 = temp1->super_class_
3094 // We do not need to emit a read barrier for the following
3095 // heap reference load, as `temp1` is only used in a
3096 // comparison with null below, and this reference is not
3097 // kept afterwards.
3098 __ Ldr(temp1, HeapOperand(temp1, super_offset));
3099 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3100 __ Bind(&do_copy);
3101 } else {
3102 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3103 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003104 } else {
Roland Levillain0b671c02016-08-19 12:02:34 +01003105 // Non read barrier code.
3106
3107 // /* HeapReference<Class> */ temp1 = dest->klass_
3108 __ Ldr(temp1, MemOperand(dest, class_offset));
3109 // /* HeapReference<Class> */ temp2 = src->klass_
3110 __ Ldr(temp2, MemOperand(src, class_offset));
3111 bool did_unpoison = false;
3112 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
3113 !optimizations.GetSourceIsNonPrimitiveArray()) {
3114 // One or two of the references need to be unpoisoned. Unpoison them
3115 // both to make the identity check valid.
3116 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3117 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3118 did_unpoison = true;
3119 }
3120
3121 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3122 // Bail out if the destination is not a non primitive array.
3123 // /* HeapReference<Class> */ temp3 = temp1->component_type_
3124 __ Ldr(temp3, HeapOperand(temp1, component_offset));
3125 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
3126 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
3127 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
3128 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
3129 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3130 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
3131 }
3132
3133 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3134 // Bail out if the source is not a non primitive array.
3135 // /* HeapReference<Class> */ temp3 = temp2->component_type_
3136 __ Ldr(temp3, HeapOperand(temp2, component_offset));
3137 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
3138 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
3139 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
3140 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
3141 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3142 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
3143 }
3144
3145 __ Cmp(temp1, temp2);
3146
3147 if (optimizations.GetDestinationIsTypedObjectArray()) {
3148 vixl::aarch64::Label do_copy;
3149 __ B(&do_copy, eq);
3150 if (!did_unpoison) {
3151 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3152 }
3153 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3154 __ Ldr(temp1, HeapOperand(temp1, component_offset));
3155 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3156 // /* HeapReference<Class> */ temp1 = temp1->super_class_
3157 __ Ldr(temp1, HeapOperand(temp1, super_offset));
3158 // No need to unpoison the result, we're comparing against null.
3159 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
3160 __ Bind(&do_copy);
3161 } else {
3162 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
3163 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003164 }
3165 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3166 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3167 // Bail out if the source is not a non primitive array.
Roland Levillain0b671c02016-08-19 12:02:34 +01003168 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3169 // /* HeapReference<Class> */ temp1 = src->klass_
3170 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3171 temp1_loc,
3172 src.W(),
3173 class_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003174 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003175 /* needs_null_check= */ false,
3176 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003177 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3178 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3179 temp2_loc,
3180 temp1,
3181 component_offset,
Vladimir Markof4f2daa2017-03-20 18:26:59 +00003182 temp3_loc,
Andreas Gampe3db70682018-12-26 15:12:03 -08003183 /* needs_null_check= */ false,
3184 /* use_load_acquire= */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003185 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3186 // If heap poisoning is enabled, `temp2` has been unpoisoned
3187 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3188 } else {
3189 // /* HeapReference<Class> */ temp1 = src->klass_
3190 __ Ldr(temp1, HeapOperand(src.W(), class_offset));
3191 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
3192 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3193 __ Ldr(temp2, HeapOperand(temp1, component_offset));
3194 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
3195 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
3196 }
3197 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
3198 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
donghui.baic2ec9ad2016-03-10 14:02:55 +08003199 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
Roland Levillain0b671c02016-08-19 12:02:34 +01003200 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003201 }
3202
Roland Levillain1663d162017-03-17 15:15:21 +00003203 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
3204 // Null constant length: not need to emit the loop code at all.
Roland Levillain0b671c02016-08-19 12:02:34 +01003205 } else {
Roland Levillain1663d162017-03-17 15:15:21 +00003206 Register src_curr_addr = temp1.X();
3207 Register dst_curr_addr = temp2.X();
3208 Register src_stop_addr = temp3.X();
3209 vixl::aarch64::Label done;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003210 const DataType::Type type = DataType::Type::kReference;
3211 const int32_t element_size = DataType::Size(type);
Roland Levillain1663d162017-03-17 15:15:21 +00003212
3213 if (length.IsRegister()) {
3214 // Don't enter the copy loop if the length is null.
3215 __ Cbz(WRegisterFrom(length), &done);
3216 }
3217
3218 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3219 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
3220
3221 // SystemArrayCopy implementation for Baker read barriers (see
Roland Levillain9983e302017-07-14 14:34:22 +01003222 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
Roland Levillain1663d162017-03-17 15:15:21 +00003223 //
3224 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3225 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3226 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3227 // if (is_gray) {
3228 // // Slow-path copy.
3229 // do {
3230 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
3231 // } while (src_ptr != end_ptr)
3232 // } else {
3233 // // Fast-path copy.
3234 // do {
3235 // *dest_ptr++ = *src_ptr++;
3236 // } while (src_ptr != end_ptr)
3237 // }
3238
3239 // Make sure `tmp` is not IP0, as it is clobbered by
3240 // ReadBarrierMarkRegX entry points in
3241 // ReadBarrierSystemArrayCopySlowPathARM64.
Roland Levillain1ca955d2017-04-13 19:34:30 +01003242 DCHECK(temps.IsAvailable(ip0));
Roland Levillain1663d162017-03-17 15:15:21 +00003243 temps.Exclude(ip0);
Roland Levillain0b671c02016-08-19 12:02:34 +01003244 Register tmp = temps.AcquireW();
Roland Levillain1663d162017-03-17 15:15:21 +00003245 DCHECK_NE(LocationFrom(tmp).reg(), IP0);
Roland Levillain1ca955d2017-04-13 19:34:30 +01003246 // Put IP0 back in the pool so that VIXL has at least one
3247 // scratch register available to emit macro-instructions (note
3248 // that IP1 is already used for `tmp`). Indeed some
3249 // macro-instructions used in GenSystemArrayCopyAddresses
3250 // (invoked hereunder) may require a scratch register (for
3251 // instance to emit a load with a large constant offset).
3252 temps.Include(ip0);
Roland Levillain1663d162017-03-17 15:15:21 +00003253
3254 // /* int32_t */ monitor = src->monitor_
3255 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
3256 // /* LockWord */ lock_word = LockWord(monitor)
3257 static_assert(sizeof(LockWord) == sizeof(int32_t),
3258 "art::LockWord and int32_t have different sizes.");
3259
3260 // Introduce a dependency on the lock_word including rb_state,
3261 // to prevent load-load reordering, and without using
3262 // a memory barrier (which would be more expensive).
3263 // `src` is unchanged by this operation, but its value now depends
3264 // on `tmp`.
3265 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
3266
3267 // Compute base source address, base destination address, and end
3268 // source address for System.arraycopy* intrinsics in `src_base`,
3269 // `dst_base` and `src_end` respectively.
3270 // Note that `src_curr_addr` is computed from from `src` (and
3271 // `src_pos`) here, and thus honors the artificial dependency
3272 // of `src` on `tmp`.
3273 GenSystemArrayCopyAddresses(masm,
3274 type,
3275 src,
3276 src_pos,
3277 dest,
3278 dest_pos,
3279 length,
3280 src_curr_addr,
3281 dst_curr_addr,
3282 src_stop_addr);
3283
3284 // Slow path used to copy array when `src` is gray.
3285 SlowPathCodeARM64* read_barrier_slow_path =
Vladimir Marko174b2e22017-10-12 13:34:49 +01003286 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
3287 invoke, LocationFrom(tmp));
Roland Levillain1663d162017-03-17 15:15:21 +00003288 codegen_->AddSlowPath(read_barrier_slow_path);
3289
3290 // Given the numeric representation, it's enough to check the low bit of the rb_state.
Roland Levillain14e5a292018-06-28 12:00:56 +01003291 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
Roland Levillain1663d162017-03-17 15:15:21 +00003292 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3293 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
3294
3295 // Fast-path copy.
3296 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3297 // poison/unpoison.
3298 vixl::aarch64::Label loop;
3299 __ Bind(&loop);
Roland Levillain0b671c02016-08-19 12:02:34 +01003300 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3301 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
Roland Levillain1663d162017-03-17 15:15:21 +00003302 __ Cmp(src_curr_addr, src_stop_addr);
3303 __ B(&loop, ne);
3304
3305 __ Bind(read_barrier_slow_path->GetExitLabel());
3306 } else {
3307 // Non read barrier code.
3308 // Compute base source address, base destination address, and end
3309 // source address for System.arraycopy* intrinsics in `src_base`,
3310 // `dst_base` and `src_end` respectively.
3311 GenSystemArrayCopyAddresses(masm,
3312 type,
3313 src,
3314 src_pos,
3315 dest,
3316 dest_pos,
3317 length,
3318 src_curr_addr,
3319 dst_curr_addr,
3320 src_stop_addr);
3321 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3322 // poison/unpoison.
3323 vixl::aarch64::Label loop;
3324 __ Bind(&loop);
3325 {
3326 Register tmp = temps.AcquireW();
3327 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
3328 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
3329 }
3330 __ Cmp(src_curr_addr, src_stop_addr);
3331 __ B(&loop, ne);
Roland Levillain0b671c02016-08-19 12:02:34 +01003332 }
Roland Levillain0b671c02016-08-19 12:02:34 +01003333 __ Bind(&done);
donghui.baic2ec9ad2016-03-10 14:02:55 +08003334 }
donghui.baic2ec9ad2016-03-10 14:02:55 +08003335 }
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003336
donghui.baic2ec9ad2016-03-10 14:02:55 +08003337 // We only need one card marking on the destination array.
Andreas Gampe3db70682018-12-26 15:12:03 -08003338 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
donghui.baic2ec9ad2016-03-10 14:02:55 +08003339
Roland Levillain0b671c02016-08-19 12:02:34 +01003340 __ Bind(intrinsic_slow_path->GetExitLabel());
donghui.baic2ec9ad2016-03-10 14:02:55 +08003341}
3342
Anton Kirilova3ffea22016-04-07 17:02:37 +01003343static void GenIsInfinite(LocationSummary* locations,
3344 bool is64bit,
Scott Wakeling97c72b72016-06-24 16:19:36 +01003345 MacroAssembler* masm) {
Artem Serova07de552020-11-01 22:42:43 +00003346 Operand infinity(0);
3347 Operand tst_mask(0);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003348 Register out;
3349
3350 if (is64bit) {
Artem Serova07de552020-11-01 22:42:43 +00003351 infinity = Operand(kPositiveInfinityDouble);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003352 tst_mask = MaskLeastSignificant<uint64_t>(63);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003353 out = XRegisterFrom(locations->Out());
3354 } else {
Artem Serova07de552020-11-01 22:42:43 +00003355 infinity = Operand(kPositiveInfinityFloat);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003356 tst_mask = MaskLeastSignificant<uint32_t>(31);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003357 out = WRegisterFrom(locations->Out());
3358 }
3359
Anton Kirilova3ffea22016-04-07 17:02:37 +01003360 MoveFPToInt(locations, is64bit, masm);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003361 // Checks whether exponent bits are all 1 and fraction bits are all 0.
Anton Kirilova3ffea22016-04-07 17:02:37 +01003362 __ Eor(out, out, infinity);
Artem Serov67e4a4d2020-02-06 15:11:36 +00003363 // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
3364 // depending on is64bit.
3365 __ Tst(out, tst_mask);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003366 __ Cset(out, eq);
3367}
3368
3369void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003370 CreateFPToIntLocations(allocator_, invoke);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003371}
3372
3373void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08003374 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
Anton Kirilova3ffea22016-04-07 17:02:37 +01003375}
3376
3377void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003378 CreateFPToIntLocations(allocator_, invoke);
Anton Kirilova3ffea22016-04-07 17:02:37 +01003379}
3380
3381void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
Andreas Gampe3db70682018-12-26 15:12:03 -08003382 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
Anton Kirilova3ffea22016-04-07 17:02:37 +01003383}
3384
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003385void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
3386 InvokeRuntimeCallingConvention calling_convention;
3387 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3388 invoke,
3389 codegen_,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003390 calling_convention.GetReturnLocation(DataType::Type::kReference),
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003391 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3392}
3393
3394void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
Vladimir Marko6fd16062018-06-26 11:02:04 +01003395 IntrinsicVisitor::IntegerValueOfInfo info =
3396 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003397 LocationSummary* locations = invoke->GetLocations();
3398 MacroAssembler* masm = GetVIXLAssembler();
3399
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003400 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003401 UseScratchRegisterScope temps(masm);
3402 Register temp = temps.AcquireW();
Vladimir Markode91ca92020-10-27 13:41:40 +00003403 auto allocate_instance = [&]() {
3404 DCHECK(out.X().Is(InvokeRuntimeCallingConvention().GetRegisterAt(0)));
3405 codegen_->LoadIntrinsicDeclaringClass(out, invoke);
3406 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3407 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3408 };
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003409 if (invoke->InputAt(0)->IsConstant()) {
3410 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
Vladimir Marko6fd16062018-06-26 11:02:04 +01003411 if (static_cast<uint32_t>(value - info.low) < info.length) {
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003412 // Just embed the j.l.Integer in the code.
Vladimir Marko6fd16062018-06-26 11:02:04 +01003413 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3414 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003415 } else {
Vladimir Markoeebb8212018-06-05 14:57:24 +01003416 DCHECK(locations->CanCall());
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003417 // Allocate and initialize a new j.l.Integer.
3418 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3419 // JIT object table.
Vladimir Markode91ca92020-10-27 13:41:40 +00003420 allocate_instance();
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003421 __ Mov(temp.W(), value);
3422 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
Hans Boehmcc5629c2020-10-30 16:12:01 -07003423 // Class pointer and `value` final field stores require a barrier before publication.
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003424 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3425 }
3426 } else {
Vladimir Markoeebb8212018-06-05 14:57:24 +01003427 DCHECK(locations->CanCall());
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003428 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003429 // Check bounds of our cache.
3430 __ Add(out.W(), in.W(), -info.low);
Vladimir Markoeebb8212018-06-05 14:57:24 +01003431 __ Cmp(out.W(), info.length);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003432 vixl::aarch64::Label allocate, done;
3433 __ B(&allocate, hs);
3434 // If the value is within the bounds, load the j.l.Integer directly from the array.
Vladimir Marko6fd16062018-06-26 11:02:04 +01003435 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003436 MemOperand source = HeapOperand(
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003437 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3438 codegen_->Load(DataType::Type::kReference, out, source);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003439 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3440 __ B(&done);
3441 __ Bind(&allocate);
3442 // Otherwise allocate and initialize a new j.l.Integer.
Vladimir Markode91ca92020-10-27 13:41:40 +00003443 allocate_instance();
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003444 __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
Hans Boehmcc5629c2020-10-30 16:12:01 -07003445 // Class pointer and `value` final field stores require a barrier before publication.
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003446 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3447 __ Bind(&done);
3448 }
3449}
3450
Vladimir Marko01b65522020-10-28 15:43:54 +00003451void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3452 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3453
3454 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
3455 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
3456 }
3457}
3458
3459void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
3460 MacroAssembler* masm = GetVIXLAssembler();
3461 LocationSummary* locations = invoke->GetLocations();
3462
3463 Location obj = locations->InAt(0);
3464 Location out = locations->Out();
3465
3466 SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
3467 codegen_->AddSlowPath(slow_path);
3468
3469 if (kEmitCompilerReadBarrier) {
3470 // Check self->GetWeakRefAccessEnabled().
3471 UseScratchRegisterScope temps(masm);
3472 Register temp = temps.AcquireW();
3473 __ Ldr(temp,
3474 MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value()));
Hans Boehm1b3ec0f2022-01-26 16:53:07 +00003475 static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
3476 __ Cbnz(temp, slow_path->GetEntryLabel());
Vladimir Marko01b65522020-10-28 15:43:54 +00003477 }
3478
3479 {
3480 // Load the java.lang.ref.Reference class.
3481 UseScratchRegisterScope temps(masm);
3482 Register temp = temps.AcquireW();
3483 codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
3484
3485 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3486 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3487 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3488 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3489 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3490 __ Ldrh(temp, HeapOperand(temp, disable_intrinsic_offset.Uint32Value()));
3491 __ Cbnz(temp, slow_path->GetEntryLabel());
3492 }
3493
3494 // Load the value from the field.
3495 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3496 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3497 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3498 out,
3499 WRegisterFrom(obj),
3500 referent_offset,
3501 /*maybe_temp=*/ locations->GetTemp(0),
3502 /*needs_null_check=*/ true,
3503 /*use_load_acquire=*/ true);
3504 } else {
3505 MemOperand field = HeapOperand(WRegisterFrom(obj), referent_offset);
Vladimir Marko98873af2020-12-16 12:10:03 +00003506 codegen_->LoadAcquire(
3507 invoke, DataType::Type::kReference, WRegisterFrom(out), field, /*needs_null_check=*/ true);
Vladimir Marko01b65522020-10-28 15:43:54 +00003508 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3509 }
3510 __ Bind(slow_path->GetExitLabel());
3511}
3512
Vladimir Markoac27ac02021-02-01 09:31:02 +00003513void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3514 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3515}
3516
3517void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
3518 LocationSummary* locations = invoke->GetLocations();
3519 MacroAssembler* masm = codegen_->GetVIXLAssembler();
3520 UseScratchRegisterScope temps(masm);
3521
3522 Register obj = WRegisterFrom(locations->InAt(0));
3523 Register other = WRegisterFrom(locations->InAt(1));
3524 Register out = WRegisterFrom(locations->Out());
3525 Register tmp = temps.AcquireW();
3526
3527 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3528 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3529
3530 MemOperand field = HeapOperand(obj, referent_offset);
3531 codegen_->LoadAcquire(invoke, DataType::Type::kReference, tmp, field, /*needs_null_check=*/ true);
Vladimir Markoa0a20cd2021-02-05 15:55:47 +00003532 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(tmp);
Vladimir Markoac27ac02021-02-01 09:31:02 +00003533
3534 __ Cmp(tmp, other);
3535
3536 if (kEmitCompilerReadBarrier) {
3537 DCHECK(kUseBakerReadBarrier);
3538
3539 vixl::aarch64::Label calculate_result;
3540
3541 // If the GC is not marking, the comparison result is final.
3542 __ Cbz(mr, &calculate_result);
3543
3544 __ B(&calculate_result, eq); // ZF set if taken.
3545
3546 // Check if the loaded reference is null.
3547 __ Cbz(tmp, &calculate_result); // ZF clear if taken.
3548
3549 // For correct memory visibility, we need a barrier before loading the lock word.
3550 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3551
3552 // Load the lockword and check if it is a forwarding address.
3553 static_assert(LockWord::kStateShift == 30u);
3554 static_assert(LockWord::kStateForwardingAddress == 3u);
3555 __ Ldr(tmp, HeapOperand(tmp, monitor_offset));
3556 __ Cmp(tmp, Operand(0xc0000000));
3557 __ B(&calculate_result, lo); // ZF clear if taken.
3558
3559 // Extract the forwarding address and compare with `other`.
3560 __ Cmp(other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
3561
3562 __ Bind(&calculate_result);
3563 }
3564
3565 // Convert ZF into the Boolean result.
3566 __ Cset(out, eq);
3567}
3568
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003569void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
Vladimir Markoca6fff82017-10-03 14:49:14 +01003570 LocationSummary* locations =
3571 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003572 locations->SetOut(Location::RequiresRegister());
3573}
3574
3575void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3576 MacroAssembler* masm = GetVIXLAssembler();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003577 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003578 UseScratchRegisterScope temps(masm);
3579 Register temp = temps.AcquireX();
3580
3581 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3582 __ Ldar(out.W(), MemOperand(temp));
3583
3584 vixl::aarch64::Label done;
3585 __ Cbz(out.W(), &done);
3586 __ Stlr(wzr, MemOperand(temp));
3587 __ Bind(&done);
3588}
3589
Hans Boehmc7b28de2018-03-09 17:05:28 -08003590void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3591 LocationSummary* locations =
3592 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3593 locations->SetInAt(0, Location::Any());
3594}
3595
3596void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3597
xueliang.zhongcb58b072017-10-13 12:06:56 +01003598void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
3599 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3600 return;
3601 }
3602
3603 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3604 LocationSummary::kNoCall,
3605 kIntrinsified);
3606
3607 locations->SetInAt(0, Location::RequiresRegister());
3608 locations->SetInAt(1, Location::RequiresRegister());
Evgeny Astigeevichc01dc292018-12-12 15:32:57 +00003609 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
xueliang.zhongcb58b072017-10-13 12:06:56 +01003610}
3611
3612// Lower the invoke of CRC32.update(int crc, int b).
3613void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
3614 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3615
3616 MacroAssembler* masm = GetVIXLAssembler();
3617
3618 Register crc = InputRegisterAt(invoke, 0);
3619 Register val = InputRegisterAt(invoke, 1);
3620 Register out = OutputRegister(invoke);
3621
3622 // The general algorithm of the CRC32 calculation is:
3623 // crc = ~crc
3624 // result = crc32_for_byte(crc, b)
3625 // crc = ~result
3626 // It is directly lowered to three instructions.
Evgeny Astigeevichc01dc292018-12-12 15:32:57 +00003627
3628 UseScratchRegisterScope temps(masm);
3629 Register tmp = temps.AcquireSameSizeAs(out);
3630
3631 __ Mvn(tmp, crc);
3632 __ Crc32b(tmp, tmp, val);
3633 __ Mvn(out, tmp);
xueliang.zhongcb58b072017-10-13 12:06:56 +01003634}
3635
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003636// Generate code using CRC32 instructions which calculates
3637// a CRC32 value of a byte.
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003638//
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003639// Parameters:
3640// masm - VIXL macro assembler
3641// crc - a register holding an initial CRC value
3642// ptr - a register holding a memory address of bytes
3643// length - a register holding a number of bytes to process
3644// out - a register to put a result of calculation
3645static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
3646 const Register& crc,
3647 const Register& ptr,
3648 const Register& length,
3649 const Register& out) {
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003650 // The algorithm of CRC32 of bytes is:
3651 // crc = ~crc
3652 // process a few first bytes to make the array 8-byte aligned
3653 // while array has 8 bytes do:
3654 // crc = crc32_of_8bytes(crc, 8_bytes(array))
3655 // if array has 4 bytes:
3656 // crc = crc32_of_4bytes(crc, 4_bytes(array))
3657 // if array has 2 bytes:
3658 // crc = crc32_of_2bytes(crc, 2_bytes(array))
3659 // if array has a byte:
3660 // crc = crc32_of_byte(crc, 1_byte(array))
3661 // crc = ~crc
3662
3663 vixl::aarch64::Label loop, done;
3664 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
3665 vixl::aarch64::Label aligned2, aligned4, aligned8;
3666
3667 // Use VIXL scratch registers as the VIXL macro assembler won't use them in
3668 // instructions below.
3669 UseScratchRegisterScope temps(masm);
3670 Register len = temps.AcquireW();
3671 Register array_elem = temps.AcquireW();
3672
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003673 __ Mvn(out, crc);
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003674 __ Mov(len, length);
3675
3676 __ Tbz(ptr, 0, &aligned2);
3677 __ Subs(len, len, 1);
3678 __ B(&done, lo);
3679 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3680 __ Crc32b(out, out, array_elem);
3681
3682 __ Bind(&aligned2);
3683 __ Tbz(ptr, 1, &aligned4);
3684 __ Subs(len, len, 2);
3685 __ B(&process_1byte, lo);
3686 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3687 __ Crc32h(out, out, array_elem);
3688
3689 __ Bind(&aligned4);
3690 __ Tbz(ptr, 2, &aligned8);
3691 __ Subs(len, len, 4);
3692 __ B(&process_2bytes, lo);
3693 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3694 __ Crc32w(out, out, array_elem);
3695
3696 __ Bind(&aligned8);
3697 __ Subs(len, len, 8);
3698 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3699 __ B(&process_4bytes, lo);
3700
3701 // The main loop processing data by 8 bytes.
3702 __ Bind(&loop);
3703 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3704 __ Subs(len, len, 8);
3705 __ Crc32x(out, out, array_elem.X());
3706 // if len >= 8, process the next 8 bytes.
3707 __ B(&loop, hs);
3708
3709 // Process the data which is less than 8 bytes.
3710 // The code generated below works with values of len
3711 // which come in the range [-8, 0].
3712 // The first three bits are used to detect whether 4 bytes or 2 bytes or
3713 // a byte can be processed.
3714 // The checking order is from bit 2 to bit 0:
3715 // bit 2 is set: at least 4 bytes available
3716 // bit 1 is set: at least 2 bytes available
3717 // bit 0 is set: at least a byte available
3718 __ Bind(&process_4bytes);
3719 // Goto process_2bytes if less than four bytes available
3720 __ Tbz(len, 2, &process_2bytes);
3721 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3722 __ Crc32w(out, out, array_elem);
3723
3724 __ Bind(&process_2bytes);
3725 // Goto process_1bytes if less than two bytes available
3726 __ Tbz(len, 1, &process_1byte);
3727 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3728 __ Crc32h(out, out, array_elem);
3729
3730 __ Bind(&process_1byte);
3731 // Goto done if no bytes available
3732 __ Tbz(len, 0, &done);
3733 __ Ldrb(array_elem, MemOperand(ptr));
3734 __ Crc32b(out, out, array_elem);
3735
3736 __ Bind(&done);
3737 __ Mvn(out, out);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003738}
3739
3740// The threshold for sizes of arrays to use the library provided implementation
3741// of CRC32.updateBytes instead of the intrinsic.
3742static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3743
3744void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3745 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3746 return;
3747 }
3748
3749 LocationSummary* locations =
3750 new (allocator_) LocationSummary(invoke,
3751 LocationSummary::kCallOnSlowPath,
3752 kIntrinsified);
3753
3754 locations->SetInAt(0, Location::RequiresRegister());
3755 locations->SetInAt(1, Location::RequiresRegister());
3756 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3757 locations->SetInAt(3, Location::RequiresRegister());
3758 locations->AddTemp(Location::RequiresRegister());
3759 locations->SetOut(Location::RequiresRegister());
3760}
3761
3762// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3763//
3764// Note: The intrinsic is not used if len exceeds a threshold.
3765void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3766 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3767
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003768 MacroAssembler* masm = GetVIXLAssembler();
3769 LocationSummary* locations = invoke->GetLocations();
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003770
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003771 SlowPathCodeARM64* slow_path =
Vladimir Marko79db6462020-07-31 14:57:32 +01003772 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003773 codegen_->AddSlowPath(slow_path);
3774
3775 Register length = WRegisterFrom(locations->InAt(3));
3776 __ Cmp(length, kCRC32UpdateBytesThreshold);
3777 __ B(slow_path->GetEntryLabel(), hi);
3778
3779 const uint32_t array_data_offset =
3780 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3781 Register ptr = XRegisterFrom(locations->GetTemp(0));
3782 Register array = XRegisterFrom(locations->InAt(1));
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003783 Location offset = locations->InAt(2);
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003784 if (offset.IsConstant()) {
3785 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3786 __ Add(ptr, array, array_data_offset + offset_value);
3787 } else {
3788 __ Add(ptr, array, array_data_offset);
3789 __ Add(ptr, ptr, XRegisterFrom(offset));
3790 }
3791
3792 Register crc = WRegisterFrom(locations->InAt(0));
3793 Register out = WRegisterFrom(locations->Out());
3794
3795 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
Evgeny Astigeevich15c5b972018-11-20 13:41:40 +00003796
3797 __ Bind(slow_path->GetExitLabel());
3798}
3799
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003800void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3801 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3802 return;
3803 }
3804
3805 LocationSummary* locations =
3806 new (allocator_) LocationSummary(invoke,
3807 LocationSummary::kNoCall,
3808 kIntrinsified);
3809
3810 locations->SetInAt(0, Location::RequiresRegister());
3811 locations->SetInAt(1, Location::RequiresRegister());
3812 locations->SetInAt(2, Location::RequiresRegister());
3813 locations->SetInAt(3, Location::RequiresRegister());
3814 locations->AddTemp(Location::RequiresRegister());
3815 locations->SetOut(Location::RequiresRegister());
3816}
3817
3818// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3819//
3820// There is no need to generate code checking if addr is 0.
3821// The method updateByteBuffer is a private method of java.util.zip.CRC32.
3822// This guarantees no calls outside of the CRC32 class.
3823// An address of DirectBuffer is always passed to the call of updateByteBuffer.
3824// It might be an implementation of an empty DirectBuffer which can use a zero
3825// address but it must have the length to be zero. The current generated code
3826// correctly works with the zero length.
3827void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3828 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3829
Evgeny Astigeeviche36f5f62019-01-08 17:01:31 +00003830 MacroAssembler* masm = GetVIXLAssembler();
3831 LocationSummary* locations = invoke->GetLocations();
Evgeny Astigeevich776a7c22018-12-17 11:40:34 +00003832
3833 Register addr = XRegisterFrom(locations->InAt(1));
3834 Register ptr = XRegisterFrom(locations->GetTemp(0));
3835 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3836
3837 Register crc = WRegisterFrom(locations->InAt(0));
3838 Register length = WRegisterFrom(locations->InAt(3));
3839 Register out = WRegisterFrom(locations->Out());
3840 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3841}
3842
xueliang.zhong9ce340f2019-01-22 17:46:09 +00003843void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3844 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3845 return;
3846 }
3847
3848 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3849 LocationSummary::kNoCall,
3850 kIntrinsified);
3851 locations->SetInAt(0, Location::RequiresRegister());
3852 locations->SetOut(Location::RequiresFpuRegister());
3853}
3854
3855void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3856 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3857 MacroAssembler* masm = GetVIXLAssembler();
3858 UseScratchRegisterScope scratch_scope(masm);
3859 Register bits = InputRegisterAt(invoke, 0);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01003860 VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3861 VRegister half = scratch_scope.AcquireH();
xueliang.zhong9ce340f2019-01-22 17:46:09 +00003862 __ Fmov(half, bits); // ARMv8.2
3863 __ Fcvt(out, half);
3864}
3865
Vladimir Marko7f958e32019-10-24 09:03:58 +00003866void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3867 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3868 return;
3869 }
3870
3871 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3872 LocationSummary::kNoCall,
3873 kIntrinsified);
3874 locations->SetInAt(0, Location::RequiresFpuRegister());
3875 locations->SetOut(Location::RequiresRegister());
3876}
3877
3878void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3879 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3880 MacroAssembler* masm = GetVIXLAssembler();
3881 UseScratchRegisterScope scratch_scope(masm);
Evgeny Astigeevich7d48dcd2019-10-16 12:46:28 +01003882 VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3883 VRegister half = scratch_scope.AcquireH();
Vladimir Marko7f958e32019-10-24 09:03:58 +00003884 Register out = WRegisterFrom(invoke->GetLocations()->Out());
3885 __ Fcvt(half, in);
3886 __ Fmov(out, half);
3887 __ Sxth(out, out); // sign extend due to returning a short type.
3888}
3889
Usama Arifb9f02c22019-10-25 17:37:33 +01003890template<typename OP>
3891void GenerateFP16Round(HInvoke* invoke,
3892 CodeGeneratorARM64* const codegen_,
3893 MacroAssembler* masm,
3894 const OP roundOp) {
3895 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3896 LocationSummary* locations = invoke->GetLocations();
3897 UseScratchRegisterScope scratch_scope(masm);
3898 Register out = WRegisterFrom(locations->Out());
3899 VRegister half = scratch_scope.AcquireH();
3900 __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3901 roundOp(half, half);
3902 __ Fmov(out, half);
3903 __ Sxth(out, out);
3904}
3905
3906void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3907 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3908 return;
3909 }
3910
3911 CreateIntToIntLocations(allocator_, invoke);
3912}
3913
3914void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3915 MacroAssembler* masm = GetVIXLAssembler();
3916 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3917 __ Frintm(out, in); // Round towards Minus infinity
3918 };
3919 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3920}
3921
Usama Arif665aac42019-10-29 11:13:18 +00003922void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
3923 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3924 return;
3925 }
3926
3927 CreateIntToIntLocations(allocator_, invoke);
3928}
3929
3930void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
3931 MacroAssembler* masm = GetVIXLAssembler();
Roland Levillain52f8e5c2019-11-13 17:30:27 +00003932 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
Usama Arif665aac42019-10-29 11:13:18 +00003933 __ Frintp(out, in); // Round towards Plus infinity
3934 };
3935 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3936}
3937
Usama Arif681692b2019-10-30 16:23:26 +00003938void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
3939 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3940 return;
3941 }
3942
3943 CreateIntToIntLocations(allocator_, invoke);
3944}
3945
3946void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
3947 MacroAssembler* masm = GetVIXLAssembler();
Roland Levillain52f8e5c2019-11-13 17:30:27 +00003948 auto roundOp = [masm](const VRegister& out, const VRegister& in) {
Usama Arif681692b2019-10-30 16:23:26 +00003949 __ Frintn(out, in); // Round to nearest, with ties to even
3950 };
3951 GenerateFP16Round(invoke, codegen_, masm, roundOp);
3952}
3953
Usama Arif39e29792019-11-15 10:53:29 +00003954void FP16ComparisonLocations(HInvoke* invoke,
3955 ArenaAllocator* allocator_,
3956 CodeGeneratorARM64* codegen_,
3957 int requiredTemps) {
3958 if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3959 return;
3960 }
3961
3962 CreateIntIntToIntLocations(allocator_, invoke);
3963 for (int i = 0; i < requiredTemps; i++) {
3964 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3965 }
3966}
3967
Usama Arif457e9fa2019-11-11 15:29:59 +00003968template<typename OP>
3969void GenerateFP16Compare(HInvoke* invoke,
3970 CodeGeneratorARM64* codegen,
3971 MacroAssembler* masm,
3972 const OP compareOp) {
3973 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
3974 LocationSummary* locations = invoke->GetLocations();
3975 Register out = WRegisterFrom(locations->Out());
3976 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
3977 VRegister half1 = HRegisterFrom(locations->GetTemp(1));
3978 __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
3979 __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
3980 compareOp(out, half0, half1);
3981}
3982
3983static inline void GenerateFP16Compare(HInvoke* invoke,
3984 CodeGeneratorARM64* codegen,
3985 MacroAssembler* masm,
3986 vixl::aarch64::Condition cond) {
3987 auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
3988 __ Fcmp(in0, in1);
3989 __ Cset(out, cond);
3990 };
3991 GenerateFP16Compare(invoke, codegen, masm, compareOp);
3992}
3993
3994void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00003995 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00003996}
3997
3998void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
3999 MacroAssembler* masm = GetVIXLAssembler();
4000 GenerateFP16Compare(invoke, codegen_, masm, gt);
4001}
4002
4003void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004004 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00004005}
4006
4007void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
4008 MacroAssembler* masm = GetVIXLAssembler();
4009 GenerateFP16Compare(invoke, codegen_, masm, ge);
4010}
4011
4012void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004013 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00004014}
4015
4016void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
4017 MacroAssembler* masm = GetVIXLAssembler();
4018 GenerateFP16Compare(invoke, codegen_, masm, mi);
4019}
4020
4021void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004022 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arif457e9fa2019-11-11 15:29:59 +00004023}
4024
4025void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
4026 MacroAssembler* masm = GetVIXLAssembler();
4027 GenerateFP16Compare(invoke, codegen_, masm, ls);
4028}
4029
Usama Arifecbdc072019-11-13 13:32:54 +00004030void IntrinsicLocationsBuilderARM64::VisitFP16Compare(HInvoke* invoke) {
Usama Arif39e29792019-11-15 10:53:29 +00004031 FP16ComparisonLocations(invoke, allocator_, codegen_, 2);
Usama Arifecbdc072019-11-13 13:32:54 +00004032}
4033
4034void IntrinsicCodeGeneratorARM64::VisitFP16Compare(HInvoke* invoke) {
4035 MacroAssembler* masm = GetVIXLAssembler();
4036 auto compareOp = [masm](const Register out,
4037 const VRegister& in0,
4038 const VRegister& in1) {
4039 vixl::aarch64::Label end;
4040 vixl::aarch64::Label equal;
4041 vixl::aarch64::Label normal;
4042
4043 // The normal cases for this method are:
4044 // - in0 > in1 => out = 1
4045 // - in0 < in1 => out = -1
4046 // - in0 == in1 => out = 0
4047 // +/-Infinity are ordered by default so are handled by the normal case.
4048 // There are two special cases that Fcmp is insufficient for distinguishing:
4049 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4050 // - in0 or in1 is NaN => manually compare with in0 and in1 separately
4051 __ Fcmp(in0, in1);
4052 __ B(eq, &equal); // in0==in1 or +0 -0 case.
4053 __ B(vc, &normal); // in0 and in1 are ordered (not NaN).
4054
4055 // Either of the inputs is NaN.
4056 // NaN is equal to itself and greater than any other number so:
4057 // - if only in0 is NaN => return 1
4058 // - if only in1 is NaN => return -1
4059 // - if both in0 and in1 are NaN => return 0
4060 __ Fcmp(in0, 0.0);
4061 __ Mov(out, -1);
4062 __ B(vc, &end); // in0 != NaN => out = -1.
4063 __ Fcmp(in1, 0.0);
4064 __ Cset(out, vc); // if in1 != NaN => out = 1, otherwise both are NaNs => out = 0.
4065 __ B(&end);
4066
4067 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4068 __ Bind(&equal);
4069 // Compare encoding of in0 and in1 as the denormal fraction of single precision float.
4070 // Reverse operand order because -0 > +0 when compared as S registers.
4071 // The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4072 // Therefore the value of bits[127:16] will not matter when doing the
4073 // below Fcmp as they are set to 0.
4074 __ Fcmp(in1.S(), in0.S());
4075
4076 __ Bind(&normal);
4077 __ Cset(out, gt); // if in0 > in1 => out = 1, otherwise out = 0.
4078 // Note: could be from equals path or original comparison
4079 __ Csinv(out, out, wzr, pl); // if in0 >= in1 out=out, otherwise out=-1.
4080
4081 __ Bind(&end);
4082 };
4083
4084 GenerateFP16Compare(invoke, codegen_, masm, compareOp);
4085}
4086
Usama Arif39e29792019-11-15 10:53:29 +00004087const int kFP16NaN = 0x7e00;
4088
4089static inline void GenerateFP16MinMax(HInvoke* invoke,
4090 CodeGeneratorARM64* codegen,
4091 MacroAssembler* masm,
4092 vixl::aarch64::Condition cond) {
4093 DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
4094 LocationSummary* locations = invoke->GetLocations();
4095
4096 vixl::aarch64::Label equal;
4097 vixl::aarch64::Label end;
4098
4099 UseScratchRegisterScope temps(masm);
4100
4101 Register out = WRegisterFrom(locations->Out());
4102 Register in0 = WRegisterFrom(locations->InAt(0));
4103 Register in1 = WRegisterFrom(locations->InAt(1));
4104 VRegister half0 = HRegisterFrom(locations->GetTemp(0));
4105 VRegister half1 = temps.AcquireH();
4106
4107 // The normal cases for this method are:
4108 // - in0.h == in1.h => out = in0 or in1
4109 // - in0.h <cond> in1.h => out = in0
4110 // - in0.h <!cond> in1.h => out = in1
4111 // +/-Infinity are ordered by default so are handled by the normal case.
4112 // There are two special cases that Fcmp is insufficient for distinguishing:
4113 // - in0 and in1 are +0 and -0 => +0 > -0 so compare encoding instead of value
4114 // - in0 or in1 is NaN => out = NaN
4115 __ Fmov(half0, in0);
4116 __ Fmov(half1, in1);
4117 __ Fcmp(half0, half1);
4118 __ B(eq, &equal); // half0 = half1 or +0/-0 case.
4119 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4120 __ B(vc, &end); // None of the inputs were NaN.
4121
4122 // Atleast one input was NaN.
4123 __ Mov(out, kFP16NaN); // out=NaN.
4124 __ B(&end);
4125
4126 // in0 == in1 or if one of the inputs is +0 and the other is -0.
4127 __ Bind(&equal);
4128 // Fcmp cannot normally distinguish +0 and -0 so compare encoding.
4129 // Encoding is compared as the denormal fraction of a Single.
4130 // Note: encoding of -0 > encoding of +0 despite +0 > -0 so in0 and in1 are swapped.
4131 // Note: The instruction Fmov(Hregister, Wregister) zero extends the Hregister.
4132 __ Fcmp(half1.S(), half0.S());
4133
4134 __ Csel(out, in0, in1, cond); // if half0 <cond> half1 => out = in0, otherwise out = in1.
4135
4136 __ Bind(&end);
4137}
4138
4139void IntrinsicLocationsBuilderARM64::VisitFP16Min(HInvoke* invoke) {
4140 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4141}
4142
4143void IntrinsicCodeGeneratorARM64::VisitFP16Min(HInvoke* invoke) {
4144 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4145 MacroAssembler* masm = GetVIXLAssembler();
4146 GenerateFP16MinMax(invoke, codegen_, masm, mi);
4147}
4148
4149void IntrinsicLocationsBuilderARM64::VisitFP16Max(HInvoke* invoke) {
4150 FP16ComparisonLocations(invoke, allocator_, codegen_, 1);
4151}
4152
4153void IntrinsicCodeGeneratorARM64::VisitFP16Max(HInvoke* invoke) {
4154 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
4155 MacroAssembler* masm = GetVIXLAssembler();
4156 GenerateFP16MinMax(invoke, codegen_, masm, gt);
4157}
4158
Artem Serova3bd4ec2020-08-27 16:26:17 +01004159static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4160 LocationSummary* locations = invoke->GetLocations();
4161 MacroAssembler* masm = codegen->GetVIXLAssembler();
4162 DataType::Type type = invoke->GetType();
4163 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4164
4165 Register dividend = RegisterFrom(locations->InAt(0), type);
4166 Register divisor = RegisterFrom(locations->InAt(1), type);
4167 Register out = RegisterFrom(locations->Out(), type);
4168
4169 // Check if divisor is zero, bail to managed implementation to handle.
4170 SlowPathCodeARM64* slow_path =
4171 new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
4172 codegen->AddSlowPath(slow_path);
4173 __ Cbz(divisor, slow_path->GetEntryLabel());
4174
4175 __ Udiv(out, dividend, divisor);
4176
4177 __ Bind(slow_path->GetExitLabel());
4178}
4179
4180void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4181 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4182}
4183
4184void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
4185 GenerateDivideUnsigned(invoke, codegen_);
4186}
4187
4188void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4189 CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
4190}
4191
4192void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
4193 GenerateDivideUnsigned(invoke, codegen_);
4194}
4195
Nikita Iashchenko745da802021-01-20 21:52:54 +00004196void IntrinsicLocationsBuilderARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4197 CreateIntIntToIntLocations(allocator_, invoke);
4198}
4199
4200void IntrinsicCodeGeneratorARM64::VisitMathMultiplyHigh(HInvoke* invoke) {
4201 LocationSummary* locations = invoke->GetLocations();
4202 MacroAssembler* masm = codegen_->GetVIXLAssembler();
4203 DataType::Type type = invoke->GetType();
4204 DCHECK(type == DataType::Type::kInt64);
4205
4206 Register x = RegisterFrom(locations->InAt(0), type);
4207 Register y = RegisterFrom(locations->InAt(1), type);
4208 Register out = RegisterFrom(locations->Out(), type);
4209
4210 __ Smulh(out, x, y);
4211}
4212
Nikita Iashchenko3fa6e462021-09-10 17:30:04 +01004213static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4214 MacroAssembler* masm = codegen->GetVIXLAssembler();
4215
4216 VRegister n = helpers::InputFPRegisterAt(invoke, 0);
4217 VRegister m = helpers::InputFPRegisterAt(invoke, 1);
4218 VRegister a = helpers::InputFPRegisterAt(invoke, 2);
4219 VRegister out = helpers::OutputFPRegister(invoke);
4220
4221 __ Fmadd(out, n, m, a);
4222}
4223
4224void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) {
4225 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4226}
4227
4228void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
4229 GenerateMathFma(invoke, codegen_);
4230}
4231
4232void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) {
4233 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4234}
4235
4236void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) {
4237 GenerateMathFma(invoke, codegen_);
4238}
4239
Vladimir Marko98873af2020-12-16 12:10:03 +00004240class VarHandleSlowPathARM64 : public IntrinsicSlowPathARM64 {
4241 public:
4242 VarHandleSlowPathARM64(HInvoke* invoke, std::memory_order order)
4243 : IntrinsicSlowPathARM64(invoke),
4244 order_(order),
4245 return_success_(false),
4246 strong_(false),
4247 get_and_update_op_(GetAndUpdateOp::kAdd) {
4248 }
4249
4250 vixl::aarch64::Label* GetByteArrayViewCheckLabel() {
4251 return &byte_array_view_check_label_;
4252 }
4253
4254 vixl::aarch64::Label* GetNativeByteOrderLabel() {
4255 return &native_byte_order_label_;
4256 }
4257
4258 void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
4259 if (return_success) {
4260 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
4261 } else {
4262 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
4263 }
4264 return_success_ = return_success;
4265 strong_ = strong;
4266 }
4267
4268 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
4269 DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
4270 get_and_update_op_ = get_and_update_op;
4271 }
4272
4273 void EmitNativeCode(CodeGenerator* codegen_in) override {
4274 if (GetByteArrayViewCheckLabel()->IsLinked()) {
4275 EmitByteArrayViewCode(codegen_in);
4276 }
4277 IntrinsicSlowPathARM64::EmitNativeCode(codegen_in);
4278 }
4279
4280 private:
4281 HInvoke* GetInvoke() const {
4282 return GetInstruction()->AsInvoke();
4283 }
4284
4285 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4286 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4287 }
4288
4289 void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4290
4291 vixl::aarch64::Label byte_array_view_check_label_;
4292 vixl::aarch64::Label native_byte_order_label_;
4293 // Shared parameter for all VarHandle intrinsics.
4294 std::memory_order order_;
4295 // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4296 bool return_success_;
4297 bool strong_;
4298 // Extra argument for GenerateVarHandleGetAndUpdate().
4299 GetAndUpdateOp get_and_update_op_;
4300};
4301
Vladimir Markoa41ea272020-09-07 15:24:36 +00004302// Generate subtype check without read barriers.
4303static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
4304 SlowPathCodeARM64* slow_path,
4305 Register object,
4306 Register type,
4307 bool object_can_be_null = true) {
4308 MacroAssembler* masm = codegen->GetVIXLAssembler();
4309
4310 const MemberOffset class_offset = mirror::Object::ClassOffset();
4311 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4312
4313 vixl::aarch64::Label success;
4314 if (object_can_be_null) {
4315 __ Cbz(object, &success);
4316 }
4317
4318 UseScratchRegisterScope temps(masm);
4319 Register temp = temps.AcquireW();
4320
4321 __ Ldr(temp, HeapOperand(object, class_offset.Int32Value()));
4322 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4323 vixl::aarch64::Label loop;
4324 __ Bind(&loop);
Vladimir Markoe00e7d22020-09-11 14:10:40 +00004325 __ Cmp(type, temp);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004326 __ B(&success, eq);
4327 __ Ldr(temp, HeapOperand(temp, super_class_offset.Int32Value()));
4328 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4329 __ Cbz(temp, slow_path->GetEntryLabel());
4330 __ B(&loop);
4331 __ Bind(&success);
4332}
4333
Vladimir Markoe17530a2020-11-11 17:02:26 +00004334// Check access mode and the primitive type from VarHandle.varType.
Vladimir Marko479cbad2020-12-10 16:10:09 +00004335// Check reference arguments against the VarHandle.varType; for references this is a subclass
4336// check without read barrier, so it can have false negatives which we handle in the slow path.
Vladimir Markoe17530a2020-11-11 17:02:26 +00004337static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4338 CodeGeneratorARM64* codegen,
4339 SlowPathCodeARM64* slow_path,
4340 DataType::Type type) {
4341 mirror::VarHandle::AccessMode access_mode =
4342 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4343 Primitive::Type primitive_type = DataTypeToPrimitive(type);
4344
4345 MacroAssembler* masm = codegen->GetVIXLAssembler();
4346 Register varhandle = InputRegisterAt(invoke, 0);
4347
4348 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4349 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4350 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4351
4352 UseScratchRegisterScope temps(masm);
4353 Register var_type_no_rb = temps.AcquireW();
4354 Register temp2 = temps.AcquireW();
4355
4356 // Check that the operation is permitted and the primitive type of varhandle.varType.
4357 // We do not need a read barrier when loading a reference only for loading constant
4358 // primitive field through the reference. Use LDP to load the fields together.
4359 DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4360 __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
4361 codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
4362 __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
4363 __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
Vladimir Marko436977d2020-11-12 12:41:06 +00004364 if (primitive_type == Primitive::kPrimNot) {
4365 static_assert(Primitive::kPrimNot == 0);
4366 __ Cbnz(temp2, slow_path->GetEntryLabel());
4367 } else {
4368 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4369 __ B(slow_path->GetEntryLabel(), ne);
4370 }
Vladimir Markoe17530a2020-11-11 17:02:26 +00004371
4372 temps.Release(temp2);
4373
4374 if (type == DataType::Type::kReference) {
4375 // Check reference arguments against the varType.
4376 // False negatives due to varType being an interface or array type
4377 // or due to the missing read barrier are handled by the slow path.
4378 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4379 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4380 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4381 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4382 HInstruction* arg = invoke->InputAt(arg_index);
4383 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4384 if (!arg->IsNullConstant()) {
4385 Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
4386 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4387 }
4388 }
4389 }
4390}
4391
Vladimir Markoa41ea272020-09-07 15:24:36 +00004392static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4393 CodeGeneratorARM64* codegen,
4394 SlowPathCodeARM64* slow_path) {
4395 MacroAssembler* masm = codegen->GetVIXLAssembler();
4396 Register varhandle = InputRegisterAt(invoke, 0);
4397
4398 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4399
4400 UseScratchRegisterScope temps(masm);
4401 Register temp = temps.AcquireW();
4402
4403 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4404 // Do not emit read barrier (or unpoison the reference) for comparing to null.
4405 __ Ldr(temp, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4406 __ Cbnz(temp, slow_path->GetEntryLabel());
4407}
4408
Vladimir Marko479cbad2020-12-10 16:10:09 +00004409static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4410 CodeGeneratorARM64* codegen,
4411 SlowPathCodeARM64* slow_path) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004412 VarHandleOptimizations optimizations(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004413 MacroAssembler* masm = codegen->GetVIXLAssembler();
4414 Register varhandle = InputRegisterAt(invoke, 0);
4415 Register object = InputRegisterAt(invoke, 1);
4416
4417 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4418 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4419
Vladimir Marko479cbad2020-12-10 16:10:09 +00004420 // Null-check the object.
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004421 if (!optimizations.GetSkipObjectNullCheck()) {
4422 __ Cbz(object, slow_path->GetEntryLabel());
4423 }
Vladimir Marko479cbad2020-12-10 16:10:09 +00004424
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004425 if (!optimizations.GetUseKnownBootImageVarHandle()) {
4426 UseScratchRegisterScope temps(masm);
4427 Register temp = temps.AcquireW();
4428 Register temp2 = temps.AcquireW();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004429
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004430 // Check that the VarHandle references an instance field by checking that
4431 // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4432 // type compatibility check with the source object's type, which will fail for null.
4433 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4434 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4435 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4436 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4437 __ Cbnz(temp2, slow_path->GetEntryLabel());
Vladimir Markoa41ea272020-09-07 15:24:36 +00004438
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004439 // Check that the object has the correct type.
4440 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4441 temps.Release(temp2); // Needed by GenerateSubTypeObjectCheckNoReadBarrier().
4442 GenerateSubTypeObjectCheckNoReadBarrier(
4443 codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4444 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004445}
4446
Vladimir Marko479cbad2020-12-10 16:10:09 +00004447static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4448 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004449 VarHandleSlowPathARM64* slow_path) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004450 VarHandleOptimizations optimizations(invoke);
Vladimir Marko479cbad2020-12-10 16:10:09 +00004451 MacroAssembler* masm = codegen->GetVIXLAssembler();
4452 Register varhandle = InputRegisterAt(invoke, 0);
4453 Register object = InputRegisterAt(invoke, 1);
4454 Register index = InputRegisterAt(invoke, 2);
4455 DataType::Type value_type =
4456 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4457 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4458
4459 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4460 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4461 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4462 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4463 const MemberOffset class_offset = mirror::Object::ClassOffset();
4464 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4465
4466 // Null-check the object.
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004467 if (!optimizations.GetSkipObjectNullCheck()) {
4468 __ Cbz(object, slow_path->GetEntryLabel());
4469 }
Vladimir Marko479cbad2020-12-10 16:10:09 +00004470
4471 UseScratchRegisterScope temps(masm);
4472 Register temp = temps.AcquireW();
4473 Register temp2 = temps.AcquireW();
4474
4475 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4476 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4477 // coordinateType0 shall not be null but we do not explicitly verify that.
4478 DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4479 __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
4480 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4481 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4482 __ Cbz(temp2, slow_path->GetEntryLabel());
4483
Vladimir Marko479cbad2020-12-10 16:10:09 +00004484 // Check object class against componentType0.
4485 //
4486 // This is an exact check and we defer other cases to the runtime. This includes
4487 // conversion to array of superclass references, which is valid but subsequently
4488 // requires all update operations to check that the value can indeed be stored.
4489 // We do not want to perform such extra checks in the intrinsified code.
4490 //
4491 // We do this check without read barrier, so there can be false negatives which we
4492 // defer to the slow path. There shall be no false negatives for array classes in the
4493 // boot image (including Object[] and primitive arrays) because they are non-movable.
4494 __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
4495 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4496 __ Cmp(temp, temp2);
4497 __ B(slow_path->GetEntryLabel(), ne);
4498
Vladimir Marko98873af2020-12-16 12:10:03 +00004499 // Check that the coordinateType0 is an array type. We do not need a read barrier
4500 // for loading constant reference fields (or chains of them) for comparison with null,
Vladimir Marko7968cae2021-01-19 12:02:35 +00004501 // nor for finally loading a constant primitive field (primitive type) below.
Vladimir Marko98873af2020-12-16 12:10:03 +00004502 __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
4503 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4504 __ Cbz(temp2, slow_path->GetEntryLabel());
4505
4506 // Check that the array component type matches the primitive type.
4507 __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
4508 if (primitive_type == Primitive::kPrimNot) {
4509 static_assert(Primitive::kPrimNot == 0);
4510 __ Cbnz(temp2, slow_path->GetEntryLabel());
4511 } else {
4512 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
4513 // we shall check for a byte array view in the slow path.
4514 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4515 // so we cannot emit that if we're JITting without boot image.
4516 bool boot_image_available =
4517 codegen->GetCompilerOptions().IsBootImage() ||
4518 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
Ulyana Trafimovich3693b2a2021-10-29 10:43:18 +00004519 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
Vladimir Marko98873af2020-12-16 12:10:03 +00004520 vixl::aarch64::Label* slow_path_label =
4521 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4522 __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4523 __ B(slow_path_label, ne);
4524 }
4525
Vladimir Marko479cbad2020-12-10 16:10:09 +00004526 // Check for array index out of bounds.
4527 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
4528 __ Cmp(index, temp);
4529 __ B(slow_path->GetEntryLabel(), hs);
4530}
4531
4532static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4533 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004534 VarHandleSlowPathARM64* slow_path) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004535 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004536 if (expected_coordinates_count == 0u) {
4537 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
Vladimir Marko479cbad2020-12-10 16:10:09 +00004538 } else if (expected_coordinates_count == 1u) {
4539 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004540 } else {
Vladimir Marko479cbad2020-12-10 16:10:09 +00004541 DCHECK_EQ(expected_coordinates_count, 2u);
4542 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004543 }
4544}
4545
Vladimir Marko98873af2020-12-16 12:10:03 +00004546static VarHandleSlowPathARM64* GenerateVarHandleChecks(HInvoke* invoke,
4547 CodeGeneratorARM64* codegen,
4548 std::memory_order order,
4549 DataType::Type type) {
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004550 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4551 VarHandleOptimizations optimizations(invoke);
4552 if (optimizations.GetUseKnownBootImageVarHandle()) {
4553 DCHECK_NE(expected_coordinates_count, 2u);
4554 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4555 return nullptr;
4556 }
4557 }
4558
Vladimir Marko98873af2020-12-16 12:10:03 +00004559 VarHandleSlowPathARM64* slow_path =
4560 new (codegen->GetScopedAllocator()) VarHandleSlowPathARM64(invoke, order);
4561 codegen->AddSlowPath(slow_path);
4562
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004563 if (!optimizations.GetUseKnownBootImageVarHandle()) {
4564 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4565 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004566 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4567
4568 return slow_path;
4569}
4570
Vladimir Marko436977d2020-11-12 12:41:06 +00004571struct VarHandleTarget {
4572 Register object; // The object holding the value to operate on.
4573 Register offset; // The offset of the value to operate on.
4574};
4575
Vladimir Marko98873af2020-12-16 12:10:03 +00004576static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004577 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko436977d2020-11-12 12:41:06 +00004578 LocationSummary* locations = invoke->GetLocations();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004579
Vladimir Marko436977d2020-11-12 12:41:06 +00004580 VarHandleTarget target;
4581 // The temporary allocated for loading the offset.
Vladimir Marko98873af2020-12-16 12:10:03 +00004582 target.offset = WRegisterFrom(locations->GetTemp(0u));
Vladimir Marko479cbad2020-12-10 16:10:09 +00004583 // The reference to the object that holds the value to operate on.
Vladimir Marko436977d2020-11-12 12:41:06 +00004584 target.object = (expected_coordinates_count == 0u)
Vladimir Marko98873af2020-12-16 12:10:03 +00004585 ? WRegisterFrom(locations->GetTemp(1u))
Vladimir Marko436977d2020-11-12 12:41:06 +00004586 : InputRegisterAt(invoke, 1);
Vladimir Marko98873af2020-12-16 12:10:03 +00004587 return target;
4588}
4589
4590static void GenerateVarHandleTarget(HInvoke* invoke,
4591 const VarHandleTarget& target,
4592 CodeGeneratorARM64* codegen) {
4593 MacroAssembler* masm = codegen->GetVIXLAssembler();
4594 Register varhandle = InputRegisterAt(invoke, 0);
4595 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko436977d2020-11-12 12:41:06 +00004596
Vladimir Marko479cbad2020-12-10 16:10:09 +00004597 if (expected_coordinates_count <= 1u) {
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004598 if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) {
4599 ScopedObjectAccess soa(Thread::Current());
4600 ArtField* target_field = GetBootImageVarHandleField(invoke);
4601 if (expected_coordinates_count == 0u) {
4602 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4603 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4604 uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4605 codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4606 } else {
4607 codegen->LoadTypeForBootImageIntrinsic(
4608 target.object,
4609 TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4610 }
4611 }
4612 __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4613 } else {
4614 // For static fields, we need to fill the `target.object` with the declaring class,
4615 // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4616 // we do not need the declaring class, so we can forget the `ArtMethod*` when
4617 // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4618 Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
Vladimir Markoa41ea272020-09-07 15:24:36 +00004619
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004620 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4621 const MemberOffset offset_offset = ArtField::OffsetOffset();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004622
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004623 // Load the ArtField, the offset and, if needed, declaring class.
4624 __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
4625 __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
4626 if (expected_coordinates_count == 0u) {
4627 codegen->GenerateGcRootFieldLoad(invoke,
4628 LocationFrom(target.object),
4629 method.X(),
4630 ArtField::DeclaringClassOffset().Int32Value(),
4631 /*fixup_label=*/ nullptr,
4632 kCompilerReadBarrierOption);
4633 }
Vladimir Marko479cbad2020-12-10 16:10:09 +00004634 }
4635 } else {
4636 DCHECK_EQ(expected_coordinates_count, 2u);
4637 DataType::Type value_type =
4638 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4639 size_t size_shift = DataType::SizeShift(value_type);
4640 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4641
4642 Register index = InputRegisterAt(invoke, 2);
4643 Register shifted_index = index;
4644 if (size_shift != 0u) {
4645 shifted_index = target.offset;
4646 __ Lsl(shifted_index, index, size_shift);
4647 }
4648 __ Add(target.offset, shifted_index, data_offset.Int32Value());
Vladimir Markoa41ea272020-09-07 15:24:36 +00004649 }
4650}
4651
Vladimir Marko479cbad2020-12-10 16:10:09 +00004652static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004653 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4654 DataType::Type return_type = invoke->GetType();
4655
Vladimir Markoa41ea272020-09-07 15:24:36 +00004656 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4657 LocationSummary* locations =
4658 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4659 locations->SetInAt(0, Location::RequiresRegister());
Vladimir Marko479cbad2020-12-10 16:10:09 +00004660 // Require coordinates in registers. These are the object holding the value
4661 // to operate on (except for static fields) and index (for arrays and views).
4662 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4663 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4664 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004665 if (return_type != DataType::Type::kVoid) {
4666 if (DataType::IsFloatingPointType(return_type)) {
4667 locations->SetOut(Location::RequiresFpuRegister());
4668 } else {
4669 locations->SetOut(Location::RequiresRegister());
4670 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004671 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004672 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4673 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4674 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4675 HInstruction* arg = invoke->InputAt(arg_index);
4676 if (IsConstantZeroBitPattern(arg)) {
4677 locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
4678 } else if (DataType::IsFloatingPointType(arg->GetType())) {
4679 locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4680 } else {
4681 locations->SetInAt(arg_index, Location::RequiresRegister());
4682 }
4683 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004684
4685 // Add a temporary for offset.
4686 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4687 GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4688 // To preserve the offset value across the non-Baker read barrier slow path
4689 // for loading the declaring class, use a fixed callee-save register.
Vladimir Markoc8178f52020-11-24 10:38:16 +00004690 constexpr int first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4691 locations->AddTemp(Location::RegisterLocation(first_callee_save));
Vladimir Marko436977d2020-11-12 12:41:06 +00004692 } else {
4693 locations->AddTemp(Location::RequiresRegister());
4694 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004695 if (expected_coordinates_count == 0u) {
4696 // Add a temporary to hold the declaring class.
4697 locations->AddTemp(Location::RequiresRegister());
4698 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004699
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004700 return locations;
4701}
4702
4703static void CreateVarHandleGetLocations(HInvoke* invoke) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004704 VarHandleOptimizations optimizations(invoke);
4705 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004706 return;
4707 }
4708
Vladimir Markoe17530a2020-11-11 17:02:26 +00004709 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4710 invoke->GetType() == DataType::Type::kReference &&
4711 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4712 invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4713 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4714 // the passed reference and reloads it from the field. This gets the memory visibility
Vladimir Marko436977d2020-11-12 12:41:06 +00004715 // wrong for Acquire/Volatile operations. b/173104084
Vladimir Markoe17530a2020-11-11 17:02:26 +00004716 return;
4717 }
4718
Vladimir Marko479cbad2020-12-10 16:10:09 +00004719 CreateVarHandleCommonLocations(invoke);
Vladimir Marko79db6462020-07-31 14:57:32 +01004720}
4721
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004722static void GenerateVarHandleGet(HInvoke* invoke,
4723 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004724 std::memory_order order,
4725 bool byte_swap = false) {
Vladimir Marko79db6462020-07-31 14:57:32 +01004726 DataType::Type type = invoke->GetType();
4727 DCHECK_NE(type, DataType::Type::kVoid);
Vladimir Marko79db6462020-07-31 14:57:32 +01004728
Vladimir Markoa41ea272020-09-07 15:24:36 +00004729 LocationSummary* locations = invoke->GetLocations();
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004730 MacroAssembler* masm = codegen->GetVIXLAssembler();
Vladimir Marko79db6462020-07-31 14:57:32 +01004731 CPURegister out = helpers::OutputCPURegister(invoke);
Vladimir Marko79db6462020-07-31 14:57:32 +01004732
Vladimir Marko98873af2020-12-16 12:10:03 +00004733 VarHandleTarget target = GetVarHandleTarget(invoke);
4734 VarHandleSlowPathARM64* slow_path = nullptr;
4735 if (!byte_swap) {
4736 slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4737 GenerateVarHandleTarget(invoke, target, codegen);
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004738 if (slow_path != nullptr) {
4739 __ Bind(slow_path->GetNativeByteOrderLabel());
4740 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004741 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004742
Vladimir Marko98873af2020-12-16 12:10:03 +00004743 // ARM64 load-acquire instructions are implicitly sequentially consistent.
4744 bool use_load_acquire =
4745 (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
4746 DCHECK(use_load_acquire || order == std::memory_order_relaxed);
Vladimir Marko79db6462020-07-31 14:57:32 +01004747
Vladimir Marko479cbad2020-12-10 16:10:09 +00004748 // Load the value from the target location.
Vladimir Markoa41ea272020-09-07 15:24:36 +00004749 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4750 // Piggy-back on the field load path using introspection for the Baker read barrier.
Vladimir Marko436977d2020-11-12 12:41:06 +00004751 // The `target.offset` is a temporary, use it for field address.
4752 Register tmp_ptr = target.offset.X();
4753 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004754 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
4755 locations->Out(),
Vladimir Marko436977d2020-11-12 12:41:06 +00004756 target.object,
4757 MemOperand(tmp_ptr),
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004758 /*needs_null_check=*/ false,
4759 use_load_acquire);
Vladimir Marko98873af2020-12-16 12:10:03 +00004760 DCHECK(!byte_swap);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004761 } else {
Vladimir Marko436977d2020-11-12 12:41:06 +00004762 MemOperand address(target.object.X(), target.offset.X());
Vladimir Marko98873af2020-12-16 12:10:03 +00004763 CPURegister load_reg = out;
4764 DataType::Type load_type = type;
4765 UseScratchRegisterScope temps(masm);
4766 if (byte_swap) {
4767 if (type == DataType::Type::kInt16) {
4768 // Avoid unnecessary sign extension before REV16.
4769 load_type = DataType::Type::kUint16;
4770 } else if (type == DataType::Type::kFloat32) {
4771 load_type = DataType::Type::kInt32;
4772 load_reg = target.offset.W();
4773 } else if (type == DataType::Type::kFloat64) {
4774 load_type = DataType::Type::kInt64;
4775 load_reg = target.offset.X();
4776 }
4777 }
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004778 if (use_load_acquire) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004779 codegen->LoadAcquire(invoke, load_type, load_reg, address, /*needs_null_check=*/ false);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004780 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00004781 codegen->Load(load_type, load_reg, address);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004782 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004783 if (type == DataType::Type::kReference) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004784 DCHECK(!byte_swap);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004785 DCHECK(out.IsW());
4786 Location out_loc = locations->Out();
Vladimir Marko436977d2020-11-12 12:41:06 +00004787 Location object_loc = LocationFrom(target.object);
4788 Location offset_loc = LocationFrom(target.offset);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004789 codegen->MaybeGenerateReadBarrierSlow(invoke, out_loc, out_loc, object_loc, 0u, offset_loc);
Vladimir Marko98873af2020-12-16 12:10:03 +00004790 } else if (byte_swap) {
4791 GenerateReverseBytes(masm, type, load_reg, out);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004792 }
4793 }
4794
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004795 if (slow_path != nullptr) {
4796 DCHECK(!byte_swap);
Vladimir Marko98873af2020-12-16 12:10:03 +00004797 __ Bind(slow_path->GetExitLabel());
4798 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004799}
4800
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004801void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
4802 CreateVarHandleGetLocations(invoke);
4803}
4804
4805void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004806 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004807}
4808
4809void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4810 CreateVarHandleGetLocations(invoke);
4811}
4812
4813void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004814 GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004815}
4816
4817void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4818 CreateVarHandleGetLocations(invoke);
4819}
4820
4821void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004822 GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004823}
4824
4825void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4826 CreateVarHandleGetLocations(invoke);
4827}
4828
4829void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004830 GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004831}
4832
4833static void CreateVarHandleSetLocations(HInvoke* invoke) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004834 VarHandleOptimizations optimizations(invoke);
4835 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004836 return;
4837 }
4838
Vladimir Marko479cbad2020-12-10 16:10:09 +00004839 CreateVarHandleCommonLocations(invoke);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004840}
4841
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004842static void GenerateVarHandleSet(HInvoke* invoke,
4843 CodeGeneratorARM64* codegen,
Vladimir Marko98873af2020-12-16 12:10:03 +00004844 std::memory_order order,
4845 bool byte_swap = false) {
Vladimir Markoa41ea272020-09-07 15:24:36 +00004846 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4847 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4848
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004849 MacroAssembler* masm = codegen->GetVIXLAssembler();
Vladimir Markoa41ea272020-09-07 15:24:36 +00004850 CPURegister value = InputCPURegisterOrZeroRegAt(invoke, value_index);
4851
Vladimir Marko98873af2020-12-16 12:10:03 +00004852 VarHandleTarget target = GetVarHandleTarget(invoke);
4853 VarHandleSlowPathARM64* slow_path = nullptr;
4854 if (!byte_swap) {
4855 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4856 GenerateVarHandleTarget(invoke, target, codegen);
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004857 if (slow_path != nullptr) {
4858 __ Bind(slow_path->GetNativeByteOrderLabel());
4859 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004860 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004861
Vladimir Marko98873af2020-12-16 12:10:03 +00004862 // ARM64 store-release instructions are implicitly sequentially consistent.
4863 bool use_store_release =
4864 (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
4865 DCHECK(use_store_release || order == std::memory_order_relaxed);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004866
Vladimir Marko479cbad2020-12-10 16:10:09 +00004867 // Store the value to the target location.
Vladimir Markoa41ea272020-09-07 15:24:36 +00004868 {
4869 CPURegister source = value;
4870 UseScratchRegisterScope temps(masm);
4871 if (kPoisonHeapReferences && value_type == DataType::Type::kReference) {
4872 DCHECK(value.IsW());
4873 Register temp = temps.AcquireW();
4874 __ Mov(temp, value.W());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004875 codegen->GetAssembler()->PoisonHeapReference(temp);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004876 source = temp;
4877 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004878 if (byte_swap) {
4879 DCHECK(!source.IsZero()); // We use the main path for zero as it does not need a byte swap.
4880 Register temp = source.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
4881 if (value_type == DataType::Type::kInt16) {
4882 // Avoid unnecessary sign extension before storing.
4883 value_type = DataType::Type::kUint16;
4884 } else if (DataType::IsFloatingPointType(value_type)) {
4885 __ Fmov(temp, source.Is64Bits() ? source.D() : source.S());
4886 value_type = source.Is64Bits() ? DataType::Type::kInt64 : DataType::Type::kInt32;
4887 source = temp; // Source for the `GenerateReverseBytes()` below.
4888 }
4889 GenerateReverseBytes(masm, value_type, source, temp);
4890 source = temp;
4891 }
Vladimir Marko436977d2020-11-12 12:41:06 +00004892 MemOperand address(target.object.X(), target.offset.X());
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004893 if (use_store_release) {
4894 codegen->StoreRelease(invoke, value_type, source, address, /*needs_null_check=*/ false);
4895 } else {
4896 codegen->Store(value_type, source, address);
4897 }
Vladimir Markoa41ea272020-09-07 15:24:36 +00004898 }
4899
4900 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
Vladimir Marko436977d2020-11-12 12:41:06 +00004901 codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
Vladimir Markoa41ea272020-09-07 15:24:36 +00004902 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004903
Vladimir Marko9d31daa2022-04-14 10:48:44 +01004904 if (slow_path != nullptr) {
4905 DCHECK(!byte_swap);
Vladimir Marko98873af2020-12-16 12:10:03 +00004906 __ Bind(slow_path->GetExitLabel());
4907 }
Vladimir Marko79db6462020-07-31 14:57:32 +01004908}
4909
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004910void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
4911 CreateVarHandleSetLocations(invoke);
4912}
4913
4914void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004915 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004916}
4917
4918void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4919 CreateVarHandleSetLocations(invoke);
4920}
4921
4922void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004923 GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004924}
4925
4926void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
4927 CreateVarHandleSetLocations(invoke);
4928}
4929
4930void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004931 GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004932}
4933
4934void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4935 CreateVarHandleSetLocations(invoke);
4936}
4937
4938void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
Vladimir Marko98873af2020-12-16 12:10:03 +00004939 GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
Vladimir Marko2d98dc22020-10-01 11:21:37 +00004940}
4941
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004942static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00004943 VarHandleOptimizations optimizations(invoke);
4944 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004945 return;
4946 }
4947
Vladimir Markoe17530a2020-11-11 17:02:26 +00004948 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
Vladimir Marko98873af2020-12-16 12:10:03 +00004949 DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
Vladimir Markoe17530a2020-11-11 17:02:26 +00004950 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
4951 value_type == DataType::Type::kReference) {
4952 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4953 // the passed reference and reloads it from the field. This breaks the read barriers
4954 // in slow path in different ways. The marked old value may not actually be a to-space
4955 // reference to the same object as `old_value`, breaking slow path assumptions. And
4956 // for CompareAndExchange, marking the old value after comparison failure may actually
4957 // return the reference to `expected`, erroneously indicating success even though we
Vladimir Marko436977d2020-11-12 12:41:06 +00004958 // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
Vladimir Markoe17530a2020-11-11 17:02:26 +00004959 return;
4960 }
4961
Vladimir Marko479cbad2020-12-10 16:10:09 +00004962 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004963
Vladimir Markoc8178f52020-11-24 10:38:16 +00004964 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
4965 // We need callee-save registers for both the class object and offset instead of
Vladimir Marko479cbad2020-12-10 16:10:09 +00004966 // the temporaries reserved in CreateVarHandleCommonLocations().
Vladimir Markoc8178f52020-11-24 10:38:16 +00004967 static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
4968 uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
4969 uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
4970 if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
4971 DCHECK_EQ(locations->GetTempCount(), 2u);
4972 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4973 DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4974 locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4975 } else {
4976 DCHECK_EQ(locations->GetTempCount(), 1u);
4977 DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4978 locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4979 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00004980 }
Vladimir Marko98873af2020-12-16 12:10:03 +00004981 size_t old_temp_count = locations->GetTempCount();
4982 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4983 if (!return_success) {
4984 if (DataType::IsFloatingPointType(value_type)) {
4985 // Add a temporary for old value and exclusive store result if floating point
4986 // `expected` and/or `new_value` take scratch registers.
4987 size_t available_scratch_registers =
4988 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
4989 (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
4990 size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
4991 // We can reuse the declaring class (if present) and offset temporary.
4992 if (temps_needed > old_temp_count) {
4993 locations->AddRegisterTemps(temps_needed - old_temp_count);
4994 }
4995 } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
4996 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
4997 !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
4998 GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4999 // Allocate a normal temporary for store result in the non-native byte order path
5000 // because scratch registers are used by the byte-swapped `expected` and `new_value`.
5001 DCHECK_EQ(old_temp_count, 1u);
5002 locations->AddTemp(Location::RequiresRegister());
5003 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005004 }
5005 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5006 // Add a temporary for the `old_value_temp` in slow path.
5007 locations->AddTemp(Location::RequiresRegister());
5008 }
5009}
5010
5011static Register MoveToTempIfFpRegister(const CPURegister& cpu_reg,
5012 DataType::Type type,
5013 MacroAssembler* masm,
5014 UseScratchRegisterScope* temps) {
5015 if (cpu_reg.IsS()) {
5016 DCHECK_EQ(type, DataType::Type::kFloat32);
5017 Register reg = temps->AcquireW();
5018 __ Fmov(reg, cpu_reg.S());
5019 return reg;
5020 } else if (cpu_reg.IsD()) {
5021 DCHECK_EQ(type, DataType::Type::kFloat64);
5022 Register reg = temps->AcquireX();
5023 __ Fmov(reg, cpu_reg.D());
5024 return reg;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005025 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00005026 return DataType::Is64BitType(type) ? cpu_reg.X() : cpu_reg.W();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005027 }
5028}
5029
5030static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
5031 CodeGeneratorARM64* codegen,
5032 std::memory_order order,
5033 bool return_success,
Vladimir Marko98873af2020-12-16 12:10:03 +00005034 bool strong,
5035 bool byte_swap = false) {
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005036 DCHECK(return_success || strong);
5037
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005038 uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
5039 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
5040 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5041 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
5042
5043 MacroAssembler* masm = codegen->GetVIXLAssembler();
5044 LocationSummary* locations = invoke->GetLocations();
5045 CPURegister expected = InputCPURegisterOrZeroRegAt(invoke, expected_index);
5046 CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
5047 CPURegister out = helpers::OutputCPURegister(invoke);
5048
Vladimir Marko98873af2020-12-16 12:10:03 +00005049 VarHandleTarget target = GetVarHandleTarget(invoke);
5050 VarHandleSlowPathARM64* slow_path = nullptr;
5051 if (!byte_swap) {
5052 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
Vladimir Marko98873af2020-12-16 12:10:03 +00005053 GenerateVarHandleTarget(invoke, target, codegen);
Vladimir Marko9d31daa2022-04-14 10:48:44 +01005054 if (slow_path != nullptr) {
5055 slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
5056 __ Bind(slow_path->GetNativeByteOrderLabel());
5057 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005058 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005059
5060 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
5061 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
5062 // Mark card for object assuming new value is stored.
5063 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
Vladimir Marko436977d2020-11-12 12:41:06 +00005064 codegen->MarkGCCard(target.object, new_value.W(), new_value_can_be_null);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005065 }
5066
Vladimir Marko479cbad2020-12-10 16:10:09 +00005067 // Reuse the `offset` temporary for the pointer to the target location,
5068 // except for references that need the offset for the read barrier.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005069 UseScratchRegisterScope temps(masm);
Vladimir Marko436977d2020-11-12 12:41:06 +00005070 Register tmp_ptr = target.offset.X();
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005071 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5072 tmp_ptr = temps.AcquireX();
5073 }
Vladimir Marko436977d2020-11-12 12:41:06 +00005074 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005075
Vladimir Marko98873af2020-12-16 12:10:03 +00005076 // Move floating point values to scratch registers.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005077 // Note that float/double CAS uses bitwise comparison, rather than the operator==.
5078 Register expected_reg = MoveToTempIfFpRegister(expected, value_type, masm, &temps);
5079 Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
Vladimir Marko98873af2020-12-16 12:10:03 +00005080 bool is_fp = DataType::IsFloatingPointType(value_type);
5081 DataType::Type cas_type = is_fp
5082 ? ((value_type == DataType::Type::kFloat64) ? DataType::Type::kInt64 : DataType::Type::kInt32)
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005083 : value_type;
Vladimir Marko98873af2020-12-16 12:10:03 +00005084 // Avoid sign extension in the CAS loop by zero-extending `expected` before the loop. This adds
5085 // one instruction for CompareAndExchange as we shall need to sign-extend the returned value.
5086 if (value_type == DataType::Type::kInt16 && !expected.IsZero()) {
5087 Register temp = temps.AcquireW();
5088 __ Uxth(temp, expected_reg);
5089 expected_reg = temp;
5090 cas_type = DataType::Type::kUint16;
5091 } else if (value_type == DataType::Type::kInt8 && !expected.IsZero()) {
5092 Register temp = temps.AcquireW();
5093 __ Uxtb(temp, expected_reg);
5094 expected_reg = temp;
5095 cas_type = DataType::Type::kUint8;
5096 }
5097
5098 if (byte_swap) {
5099 // Do the byte swap and move values to scratch registers if needed.
5100 // Non-zero FP values and non-zero `expected` for `kInt16` are already in scratch registers.
5101 DCHECK_NE(value_type, DataType::Type::kInt8);
5102 if (!expected.IsZero()) {
5103 bool is_scratch = is_fp || (value_type == DataType::Type::kInt16);
5104 Register temp = is_scratch ? expected_reg : temps.AcquireSameSizeAs(expected_reg);
5105 GenerateReverseBytes(masm, cas_type, expected_reg, temp);
5106 expected_reg = temp;
5107 }
5108 if (!new_value.IsZero()) {
5109 Register temp = is_fp ? new_value_reg : temps.AcquireSameSizeAs(new_value_reg);
5110 GenerateReverseBytes(masm, cas_type, new_value_reg, temp);
5111 new_value_reg = temp;
5112 }
5113 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005114
5115 // Prepare registers for old value and the result of the exclusive store.
5116 Register old_value;
5117 Register store_result;
5118 if (return_success) {
5119 // Use the output register for both old value and exclusive store result.
5120 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
5121 store_result = out.W();
5122 } else if (DataType::IsFloatingPointType(value_type)) {
5123 // We need two temporary registers but we have already used scratch registers for
5124 // holding the expected and new value unless they are zero bit pattern (+0.0f or
5125 // +0.0). We have allocated sufficient normal temporaries to handle that.
Vladimir Marko98873af2020-12-16 12:10:03 +00005126 size_t next_temp = 1u;
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005127 if (expected.IsZero()) {
5128 old_value = (cas_type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
5129 } else {
5130 Location temp = locations->GetTemp(next_temp);
5131 ++next_temp;
5132 old_value = (cas_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
5133 }
5134 store_result =
5135 new_value.IsZero() ? temps.AcquireW() : WRegisterFrom(locations->GetTemp(next_temp));
Vladimir Marko98873af2020-12-16 12:10:03 +00005136 DCHECK(!old_value.Is(tmp_ptr));
5137 DCHECK(!store_result.Is(tmp_ptr));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005138 } else {
Vladimir Marko98873af2020-12-16 12:10:03 +00005139 // Use the output register for the old value.
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005140 old_value = (cas_type == DataType::Type::kInt64) ? out.X() : out.W();
Vladimir Marko98873af2020-12-16 12:10:03 +00005141 // Use scratch register for the store result, except when we have used up
5142 // scratch registers for byte-swapped `expected` and `new_value`.
5143 // In that case, we have allocated a normal temporary.
5144 store_result = (byte_swap && !expected.IsZero() && !new_value.IsZero())
5145 ? WRegisterFrom(locations->GetTemp(1))
5146 : temps.AcquireW();
5147 DCHECK(!store_result.Is(tmp_ptr));
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005148 }
5149
5150 vixl::aarch64::Label exit_loop_label;
5151 vixl::aarch64::Label* exit_loop = &exit_loop_label;
5152 vixl::aarch64::Label* cmp_failure = &exit_loop_label;
5153
5154 if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5155 // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
5156 // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
Vladimir Marko98873af2020-12-16 12:10:03 +00005157 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005158 Register old_value_temp =
5159 WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u));
5160 // For strong CAS, use a scratch register for the store result in slow path.
5161 // For weak CAS, we need to check the store result, so store it in `store_result`.
5162 Register slow_path_store_result = strong ? Register() : store_result;
5163 ReadBarrierCasSlowPathARM64* rb_slow_path =
5164 new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARM64(
5165 invoke,
5166 order,
5167 strong,
Vladimir Marko436977d2020-11-12 12:41:06 +00005168 target.object,
5169 target.offset.X(),
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005170 expected_reg,
5171 new_value_reg,
5172 old_value,
5173 old_value_temp,
5174 slow_path_store_result,
5175 /*update_old_value=*/ !return_success,
5176 codegen);
5177 codegen->AddSlowPath(rb_slow_path);
5178 exit_loop = rb_slow_path->GetExitLabel();
5179 cmp_failure = rb_slow_path->GetEntryLabel();
5180 }
5181
Vladimir Markoe17530a2020-11-11 17:02:26 +00005182 GenerateCompareAndSet(codegen,
5183 cas_type,
5184 order,
5185 strong,
5186 cmp_failure,
5187 tmp_ptr,
5188 new_value_reg,
5189 old_value,
5190 store_result,
5191 expected_reg);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005192 __ Bind(exit_loop);
5193
5194 if (return_success) {
5195 if (strong) {
5196 __ Cset(out.W(), eq);
5197 } else {
Vladimir Markoe17530a2020-11-11 17:02:26 +00005198 // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005199 // On failure, either the Z flag is clear or the store result is 0.
5200 // Determine the final success value with a CSEL.
5201 __ Csel(out.W(), store_result, wzr, eq);
5202 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005203 } else if (byte_swap) {
5204 // Also handles moving to FP registers.
5205 GenerateReverseBytes(masm, value_type, old_value, out);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005206 } else if (DataType::IsFloatingPointType(value_type)) {
5207 __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
Vladimir Marko98873af2020-12-16 12:10:03 +00005208 } else if (value_type == DataType::Type::kInt8) {
5209 __ Sxtb(out.W(), old_value);
5210 } else if (value_type == DataType::Type::kInt16) {
5211 __ Sxth(out.W(), old_value);
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005212 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005213
Vladimir Marko9d31daa2022-04-14 10:48:44 +01005214 if (slow_path != nullptr) {
5215 DCHECK(!byte_swap);
Vladimir Marko98873af2020-12-16 12:10:03 +00005216 __ Bind(slow_path->GetExitLabel());
5217 }
Vladimir Marko1bff99f2020-11-02 15:07:33 +00005218}
5219
5220void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5221 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5222}
5223
5224void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5225 GenerateVarHandleCompareAndSetOrExchange(
5226 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5227}
5228
5229void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5230 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5231}
5232
5233void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5234 GenerateVarHandleCompareAndSetOrExchange(
5235 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5236}
5237
5238void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5239 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5240}
5241
5242void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5243 GenerateVarHandleCompareAndSetOrExchange(
5244 invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5245}
5246
5247void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5248 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5249}
5250
5251void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5252 GenerateVarHandleCompareAndSetOrExchange(
5253 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5254}
5255
5256void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5257 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5258}
5259
5260void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5261 GenerateVarHandleCompareAndSetOrExchange(
5262 invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5263}
5264
5265void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5266 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5267}
5268
5269void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5270 GenerateVarHandleCompareAndSetOrExchange(
5271 invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5272}
5273
5274void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5275 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5276}
5277
5278void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5279 GenerateVarHandleCompareAndSetOrExchange(
5280 invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5281}
5282
5283void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5284 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5285}
5286
5287void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5288 GenerateVarHandleCompareAndSetOrExchange(
5289 invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5290}
5291
Vladimir Markoe1510d42020-11-13 11:07:13 +00005292static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5293 GetAndUpdateOp get_and_update_op) {
Ulyana Trafimovich98f01d12021-07-28 14:33:34 +00005294 VarHandleOptimizations optimizations(invoke);
5295 if (optimizations.GetDoNotIntrinsify()) {
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005296 return;
5297 }
5298
5299 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5300 invoke->GetType() == DataType::Type::kReference) {
5301 // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5302 // the passed reference and reloads it from the field, thus seeing the new value
Vladimir Marko436977d2020-11-12 12:41:06 +00005303 // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005304 return;
5305 }
5306
Vladimir Marko479cbad2020-12-10 16:10:09 +00005307 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005308
Vladimir Marko98873af2020-12-16 12:10:03 +00005309 size_t old_temp_count = locations->GetTempCount();
5310 DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
Vladimir Markoe1510d42020-11-13 11:07:13 +00005311 if (DataType::IsFloatingPointType(invoke->GetType())) {
5312 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5313 // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
5314 locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
5315 } else {
5316 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
Vladimir Marko98873af2020-12-16 12:10:03 +00005317 // We can reuse the declaring class temporary if present.
5318 if (old_temp_count == 1u &&
5319 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005320 // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
5321 locations->AddTemp(Location::RequiresRegister());
5322 }
5323 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005324 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005325 // We need a temporary for the byte-swap path for bitwise operations unless the argument is a
5326 // zero which does not need a byte-swap. We can reuse the declaring class temporary if present.
5327 if (old_temp_count == 1u &&
5328 (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
5329 GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
5330 !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5331 DataType::Type value_type =
5332 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5333 if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
5334 locations->AddTemp(Location::RequiresRegister());
5335 }
5336 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005337}
5338
Vladimir Markoe1510d42020-11-13 11:07:13 +00005339static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5340 CodeGeneratorARM64* codegen,
5341 GetAndUpdateOp get_and_update_op,
Vladimir Marko98873af2020-12-16 12:10:03 +00005342 std::memory_order order,
5343 bool byte_swap = false) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005344 uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5345 DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005346
5347 MacroAssembler* masm = codegen->GetVIXLAssembler();
5348 LocationSummary* locations = invoke->GetLocations();
Vladimir Markoe1510d42020-11-13 11:07:13 +00005349 CPURegister arg = InputCPURegisterOrZeroRegAt(invoke, arg_index);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005350 CPURegister out = helpers::OutputCPURegister(invoke);
5351
Vladimir Marko98873af2020-12-16 12:10:03 +00005352 VarHandleTarget target = GetVarHandleTarget(invoke);
5353 VarHandleSlowPathARM64* slow_path = nullptr;
5354 if (!byte_swap) {
5355 slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
Vladimir Marko98873af2020-12-16 12:10:03 +00005356 GenerateVarHandleTarget(invoke, target, codegen);
Vladimir Marko9d31daa2022-04-14 10:48:44 +01005357 if (slow_path != nullptr) {
5358 slow_path->SetGetAndUpdateOp(get_and_update_op);
5359 __ Bind(slow_path->GetNativeByteOrderLabel());
5360 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005361 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005362
5363 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
Vladimir Markoe1510d42020-11-13 11:07:13 +00005364 if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5365 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005366 // Mark card for object, the new value shall be stored.
5367 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
Vladimir Markoe1510d42020-11-13 11:07:13 +00005368 codegen->MarkGCCard(target.object, arg.W(), new_value_can_be_null);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005369 }
5370
Vladimir Marko479cbad2020-12-10 16:10:09 +00005371 // Reuse the `target.offset` temporary for the pointer to the target location,
5372 // except for references that need the offset for the non-Baker read barrier.
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005373 UseScratchRegisterScope temps(masm);
Vladimir Marko436977d2020-11-12 12:41:06 +00005374 Register tmp_ptr = target.offset.X();
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005375 if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
5376 value_type == DataType::Type::kReference) {
5377 tmp_ptr = temps.AcquireX();
5378 }
Vladimir Marko436977d2020-11-12 12:41:06 +00005379 __ Add(tmp_ptr, target.object.X(), target.offset.X());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005380
Vladimir Markoe1510d42020-11-13 11:07:13 +00005381 // The load/store type is never floating point.
Vladimir Marko98873af2020-12-16 12:10:03 +00005382 bool is_fp = DataType::IsFloatingPointType(value_type);
5383 DataType::Type load_store_type = is_fp
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005384 ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5385 : value_type;
Vladimir Marko98873af2020-12-16 12:10:03 +00005386 // Avoid sign extension in the CAS loop. Sign-extend after the loop.
5387 // Note: Using unsigned values yields the same value to store (we do not store higher bits).
5388 if (value_type == DataType::Type::kInt8) {
5389 load_store_type = DataType::Type::kUint8;
5390 } else if (value_type == DataType::Type::kInt16) {
5391 load_store_type = DataType::Type::kUint16;
5392 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005393
5394 // Prepare register for old value.
Vladimir Markoe1510d42020-11-13 11:07:13 +00005395 CPURegister old_value = out;
5396 if (get_and_update_op == GetAndUpdateOp::kSet) {
5397 // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5398 // rather than moving between core and FP registers in the loop.
5399 arg = MoveToTempIfFpRegister(arg, value_type, masm, &temps);
5400 if (DataType::IsFloatingPointType(value_type) && !arg.IsZero()) {
5401 // We need a temporary register but we have already used a scratch register for
Vladimir Markoc8178f52020-11-24 10:38:16 +00005402 // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
Vladimir Markoe1510d42020-11-13 11:07:13 +00005403 // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
Vladimir Marko98873af2020-12-16 12:10:03 +00005404 old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
Vladimir Markoe1510d42020-11-13 11:07:13 +00005405 } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
5406 value_type == DataType::Type::kReference) {
5407 // Load the old value initially to a scratch register.
5408 // We shall move it to `out` later with a read barrier.
5409 old_value = temps.AcquireW();
5410 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005411 }
5412
Vladimir Marko98873af2020-12-16 12:10:03 +00005413 if (byte_swap) {
5414 DCHECK_NE(value_type, DataType::Type::kReference);
5415 DCHECK_NE(DataType::Size(value_type), 1u);
5416 if (get_and_update_op == GetAndUpdateOp::kAdd) {
5417 // We need to do the byte swapping in the CAS loop for GetAndAdd.
5418 get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5419 } else if (!arg.IsZero()) {
5420 // For other operations, avoid byte swap inside the CAS loop by providing an adjusted `arg`.
5421 // For GetAndSet use a scratch register; FP argument is already in a scratch register.
5422 // For bitwise operations GenerateGetAndUpdate() needs both scratch registers;
5423 // we have allocated a normal temporary to handle that.
5424 CPURegister temp = (get_and_update_op == GetAndUpdateOp::kSet)
5425 ? (is_fp ? arg : (arg.Is64Bits() ? temps.AcquireX() : temps.AcquireW()))
5426 : CPURegisterFrom(locations->GetTemp(1u), load_store_type);
5427 GenerateReverseBytes(masm, load_store_type, arg, temp);
5428 arg = temp;
5429 }
5430 }
5431
Vladimir Markoe1510d42020-11-13 11:07:13 +00005432 GenerateGetAndUpdate(codegen, get_and_update_op, load_store_type, order, tmp_ptr, arg, old_value);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005433
Vladimir Marko98873af2020-12-16 12:10:03 +00005434 if (get_and_update_op == GetAndUpdateOp::kAddWithByteSwap) {
5435 // The only adjustment needed is sign-extension for `kInt16`.
5436 // Everything else has been done by the `GenerateGetAndUpdate()`.
5437 DCHECK(byte_swap);
5438 if (value_type == DataType::Type::kInt16) {
5439 DCHECK_EQ(load_store_type, DataType::Type::kUint16);
5440 __ Sxth(out.W(), old_value.W());
Vladimir Markoe1510d42020-11-13 11:07:13 +00005441 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005442 } else if (byte_swap) {
5443 // Also handles moving to FP registers.
5444 GenerateReverseBytes(masm, value_type, old_value, out);
5445 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat64) {
5446 __ Fmov(out.D(), old_value.X());
5447 } else if (get_and_update_op == GetAndUpdateOp::kSet && value_type == DataType::Type::kFloat32) {
5448 __ Fmov(out.S(), old_value.W());
5449 } else if (value_type == DataType::Type::kInt8) {
5450 __ Sxtb(out.W(), old_value.W());
5451 } else if (value_type == DataType::Type::kInt16) {
5452 __ Sxth(out.W(), old_value.W());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005453 } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
5454 if (kUseBakerReadBarrier) {
Vladimir Markoc8178f52020-11-24 10:38:16 +00005455 codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005456 } else {
5457 codegen->GenerateReadBarrierSlow(
5458 invoke,
Vladimir Markoe1510d42020-11-13 11:07:13 +00005459 Location::RegisterLocation(out.GetCode()),
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005460 Location::RegisterLocation(old_value.GetCode()),
Vladimir Marko436977d2020-11-12 12:41:06 +00005461 Location::RegisterLocation(target.object.GetCode()),
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005462 /*offset=*/ 0u,
Vladimir Marko436977d2020-11-12 12:41:06 +00005463 /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005464 }
5465 }
Vladimir Marko98873af2020-12-16 12:10:03 +00005466
Vladimir Marko9d31daa2022-04-14 10:48:44 +01005467 if (slow_path != nullptr) {
5468 DCHECK(!byte_swap);
Vladimir Marko98873af2020-12-16 12:10:03 +00005469 __ Bind(slow_path->GetExitLabel());
5470 }
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005471}
5472
5473void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005474 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005475}
5476
5477void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005478 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005479}
5480
5481void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005482 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005483}
5484
5485void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005486 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005487}
5488
5489void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005490 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005491}
5492
5493void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
Vladimir Markoe1510d42020-11-13 11:07:13 +00005494 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
Vladimir Marko32c2eb82020-11-10 16:58:47 +00005495}
5496
Vladimir Markoe1510d42020-11-13 11:07:13 +00005497void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5498 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5499}
5500
5501void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5502 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5503}
5504
5505void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5506 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5507}
5508
5509void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5510 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5511}
5512
5513void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5514 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5515}
5516
5517void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5518 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5519}
5520
Vladimir Markoc2d5c702020-11-13 15:28:33 +00005521void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5522 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5523}
5524
5525void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5526 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5527}
5528
5529void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5530 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5531}
5532
5533void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5534 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5535}
5536
5537void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5538 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5539}
5540
5541void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5542 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5543}
5544
5545void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5546 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5547}
5548
5549void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5550 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5551}
5552
5553void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5554 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5555}
5556
5557void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5558 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5559}
5560
5561void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5562 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5563}
5564
5565void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5566 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5567}
5568
5569void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5570 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5571}
5572
5573void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5574 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5575}
5576
5577void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5578 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5579}
5580
5581void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5582 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5583}
5584
5585void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5586 CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5587}
5588
5589void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5590 GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5591}
Vladimir Markoe1510d42020-11-13 11:07:13 +00005592
Vladimir Marko98873af2020-12-16 12:10:03 +00005593void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5594 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5595 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
5596 MacroAssembler* masm = codegen->GetVIXLAssembler();
5597 HInvoke* invoke = GetInvoke();
5598 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5599 DataType::Type value_type =
5600 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5601 DCHECK_NE(value_type, DataType::Type::kReference);
5602 size_t size = DataType::Size(value_type);
5603 DCHECK_GT(size, 1u);
5604 Register varhandle = InputRegisterAt(invoke, 0);
5605 Register object = InputRegisterAt(invoke, 1);
5606 Register index = InputRegisterAt(invoke, 2);
5607
5608 MemberOffset class_offset = mirror::Object::ClassOffset();
5609 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5610 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5611 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5612
5613 __ Bind(GetByteArrayViewCheckLabel());
5614
5615 VarHandleTarget target = GetVarHandleTarget(invoke);
5616 {
5617 UseScratchRegisterScope temps(masm);
5618 Register temp = temps.AcquireW();
5619 Register temp2 = temps.AcquireW();
5620
5621 // The main path checked that the coordinateType0 is an array class that matches
5622 // the class of the actual coordinate argument but it does not match the value type.
5623 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5624 __ Ldr(temp, HeapOperand(varhandle, class_offset.Int32Value()));
Ulya Trafimovich740e1f92021-10-15 12:11:37 +01005625 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
Vladimir Marko98873af2020-12-16 12:10:03 +00005626 codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5627 __ Cmp(temp, temp2);
5628 __ B(GetEntryLabel(), ne);
5629
5630 // Check for array index out of bounds.
5631 __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
5632 __ Subs(temp, temp, index);
5633 __ Ccmp(temp, size, NoFlag, hs); // If SUBS yields LO (C=false), keep the C flag clear.
5634 __ B(GetEntryLabel(), lo);
5635
5636 // Construct the target.
5637 __ Add(target.offset, index, data_offset.Int32Value());
5638
5639 // Alignment check. For unaligned access, go to the runtime.
5640 DCHECK(IsPowerOfTwo(size));
5641 if (size == 2u) {
5642 __ Tbnz(target.offset, 0, GetEntryLabel());
5643 } else {
5644 __ Tst(target.offset, size - 1u);
5645 __ B(GetEntryLabel(), ne);
5646 }
5647
5648 // Byte order check. For native byte order return to the main path.
5649 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5650 IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5651 // There is no reason to differentiate between native byte order and byte-swap
5652 // for setting a zero bit pattern. Just return to the main path.
5653 __ B(GetNativeByteOrderLabel());
5654 return;
5655 }
5656 __ Ldr(temp, HeapOperand(varhandle, native_byte_order_offset.Int32Value()));
5657 __ Cbnz(temp, GetNativeByteOrderLabel());
5658 }
5659
5660 switch (access_mode_template) {
5661 case mirror::VarHandle::AccessModeTemplate::kGet:
5662 GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
5663 break;
5664 case mirror::VarHandle::AccessModeTemplate::kSet:
5665 GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
5666 break;
5667 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5668 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5669 GenerateVarHandleCompareAndSetOrExchange(
5670 invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5671 break;
5672 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5673 GenerateVarHandleGetAndUpdate(
5674 invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5675 break;
5676 }
5677 __ B(GetExitLabel());
5678}
5679
Aart Bikff7d89c2016-11-07 08:49:28 -08005680UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
5681UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08005682UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
5683UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
5684UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
Vladimir Markod4561172017-10-30 17:48:25 +00005685UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
5686UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
5687UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
5688UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
5689UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
5690UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
5691UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
5692UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
5693UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
5694UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
Aart Bik71bf7b42016-11-16 10:17:46 -08005695UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
5696UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
Shalini Salomi Bodapatib414a4c2022-02-10 18:03:34 +05305697UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte);
5698UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt);
Aart Bikff7d89c2016-11-07 08:49:28 -08005699
Aart Bik0e54c012016-03-04 12:08:31 -08005700// 1.8.
5701UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
5702UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
5703UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
5704UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
5705UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08005706
Andra Danciua0130e82020-07-23 12:34:56 +00005707UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
5708UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
Andra Danciua0130e82020-07-23 12:34:56 +00005709
Sorin Basca2f01e8e2021-06-18 06:44:07 +00005710// OpenJDK 11
5711UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt)
5712UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong)
5713UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt)
5714UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong)
5715UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject)
5716
Aart Bik2f9fcc92016-03-01 15:16:54 -08005717UNREACHABLE_INTRINSICS(ARM64)
Roland Levillain4d027112015-07-01 15:41:14 +01005718
5719#undef __
5720
Andreas Gampe878d58c2015-01-15 23:24:00 -08005721} // namespace arm64
5722} // namespace art