Opt Compiler: ARM64: Add support for rem-float, rem-double and volatile.
Add support for rem-float, rem-double and volatile memory accesses
using acquire-release and memory barriers.
Change-Id: I96a24dff66002c3b772c3d8e6ed792e3cb59048a
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index dde0dfe..f6ca6c7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -671,13 +671,6 @@
return false;
}
-#if defined(__aarch64__)
- if (resolved_field->IsVolatile()) {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
- return false;
- }
-#endif
-
Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
@@ -706,8 +699,6 @@
return true;
}
-
-
bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
bool is_put) {
@@ -728,13 +719,6 @@
return false;
}
-#if defined(__aarch64__)
- if (resolved_field->IsVolatile()) {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
- return false;
- }
-#endif
-
Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass(
soa, dex_cache, class_loader, outer_compilation_unit_)));
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c7517d3..e10b47c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -38,6 +38,9 @@
namespace arm64 {
+// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers.
+// For now we prefer the use of load-acquire, store-release over explicit memory barriers.
+static constexpr bool kUseAcquireRelease = true;
static constexpr bool kExplicitStackOverflowCheck = false;
static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
static constexpr int kCurrentMethodStackOffset = 0;
@@ -233,8 +236,9 @@
static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static const FPRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterCoreRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> {
public:
@@ -949,8 +953,8 @@
}
void CodeGeneratorARM64::Load(Primitive::Type type,
- vixl::CPURegister dst,
- const vixl::MemOperand& src) {
+ CPURegister dst,
+ const MemOperand& src) {
switch (type) {
case Primitive::kPrimBoolean:
__ Ldrb(Register(dst), src);
@@ -969,7 +973,7 @@
case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(dst.Is64Bits() == Is64BitType(type));
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
__ Ldr(dst, src);
break;
case Primitive::kPrimVoid:
@@ -977,31 +981,123 @@
}
}
+void CodeGeneratorARM64::LoadAcquire(Primitive::Type type,
+ CPURegister dst,
+ const MemOperand& src) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp_base = temps.AcquireX();
+
+ DCHECK(!src.IsRegisterOffset());
+ DCHECK(!src.IsPreIndex());
+ DCHECK(!src.IsPostIndex());
+
+ // TODO(vixl): Let the MacroAssembler handle MemOperand.
+ __ Add(temp_base, src.base(), src.offset());
+ MemOperand base = MemOperand(temp_base);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ __ Ldarb(Register(dst), base);
+ break;
+ case Primitive::kPrimByte:
+ __ Ldarb(Register(dst), base);
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimChar:
+ __ Ldarh(Register(dst), base);
+ break;
+ case Primitive::kPrimShort:
+ __ Ldarh(Register(dst), base);
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ case Primitive::kPrimLong:
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+ __ Ldar(Register(dst), base);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ DCHECK(dst.IsFPRegister());
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+
+ Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ __ Ldar(temp, base);
+ __ Fmov(FPRegister(dst), temp);
+ break;
+ }
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ }
+}
+
void CodeGeneratorARM64::Store(Primitive::Type type,
- vixl::CPURegister rt,
- const vixl::MemOperand& dst) {
+ CPURegister src,
+ const MemOperand& dst) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- __ Strb(Register(rt), dst);
+ __ Strb(Register(src), dst);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- __ Strh(Register(rt), dst);
+ __ Strh(Register(src), dst);
break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(rt.Is64Bits() == Is64BitType(type));
- __ Str(rt, dst);
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+ __ Str(src, dst);
break;
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << type;
}
}
+void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
+ CPURegister src,
+ const MemOperand& dst) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp_base = temps.AcquireX();
+
+ DCHECK(!dst.IsRegisterOffset());
+ DCHECK(!dst.IsPreIndex());
+ DCHECK(!dst.IsPostIndex());
+
+ // TODO(vixl): Let the MacroAssembler handle this.
+ __ Add(temp_base, dst.base(), dst.offset());
+ MemOperand base = MemOperand(temp_base);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ __ Stlrb(Register(src), base);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ __ Stlrh(Register(src), base);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ case Primitive::kPrimLong:
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+ __ Stlr(Register(src), base);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ DCHECK(src.IsFPRegister());
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+
+ Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ __ Fmov(temp, FPRegister(src));
+ __ Stlr(temp, base);
+ break;
+ }
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ }
+}
+
void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
DCHECK(current_method.IsW());
__ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
@@ -1026,14 +1122,47 @@
vixl::Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
- __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
- __ Cmp(temp, mirror::Class::kStatusInitialized);
- __ B(lt, slow_path->GetEntryLabel());
+ size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
- __ Dmb(InnerShareable, BarrierReads);
+ if (kUseAcquireRelease) {
+ // TODO(vixl): Let the MacroAssembler handle MemOperand.
+ __ Add(temp, class_reg, status_offset);
+ __ Ldar(temp, HeapOperand(temp));
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
+ } else {
+ __ Ldr(temp, HeapOperand(class_reg, status_offset));
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
+ __ Dmb(InnerShareable, BarrierReads);
+ }
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+ BarrierType type = BarrierAll;
+
+ switch (kind) {
+ case MemBarrierKind::kAnyAny:
+ case MemBarrierKind::kAnyStore: {
+ type = BarrierAll;
+ break;
+ }
+ case MemBarrierKind::kLoadAny: {
+ type = BarrierReads;
+ break;
+ }
+ case MemBarrierKind::kStoreStore: {
+ type = BarrierWrites;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ }
+ __ Dmb(InnerShareable, type);
+}
+
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM64* slow_path =
@@ -1660,28 +1789,54 @@
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ }
}
void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- Primitive::Type field_type = instruction->GetFieldType();
- CPURegister value = InputCPURegisterAt(instruction, 1);
Register obj = InputRegisterAt(instruction, 0);
- codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset()));
- if (field_type == Primitive::kPrimNot) {
+ CPURegister value = InputCPURegisterAt(instruction, 1);
+ Offset offset = instruction->GetFieldOffset();
+ Primitive::Type field_type = instruction->GetFieldType();
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
+ } else {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
codegen_->MarkGCCard(obj, Register(value));
}
}
@@ -2175,9 +2330,12 @@
}
void LocationsBuilderARM64::VisitRem(HRem* rem) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
- switch (rem->GetResultType()) {
+ Primitive::Type type = rem->GetResultType();
+ LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+ switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
@@ -2185,13 +2343,24 @@
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+
+ break;
+ }
+
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
+
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
@@ -2206,6 +2375,14 @@
break;
}
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
+ : QUICK_ENTRY_POINT(pFmod);
+ codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc());
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected rem type " << type;
}
@@ -2294,7 +2471,18 @@
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ }
}
void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
@@ -2305,13 +2493,24 @@
}
void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- CPURegister value = InputCPURegisterAt(instruction, 1);
Register cls = InputRegisterAt(instruction, 0);
+ CPURegister value = InputCPURegisterAt(instruction, 1);
Offset offset = instruction->GetFieldOffset();
Primitive::Type field_type = instruction->GetFieldType();
- codegen_->Store(field_type, value, HeapOperand(cls, offset));
- if (field_type == Primitive::kPrimNot) {
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
+ } else {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->Store(field_type, value, HeapOperand(cls, offset));
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Store(field_type, value, HeapOperand(cls, offset));
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
codegen_->MarkGCCard(cls, Register(value));
}
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 1d5bfb7..19488a4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#include "code_generator.h"
+#include "dex/compiler_enums.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
@@ -108,6 +109,7 @@
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
void HandleShift(HBinaryOperation* instr);
@@ -257,6 +259,8 @@
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
void LoadCurrentMethod(vixl::Register current_method);
+ void LoadAcquire(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
+ void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
// Generate code to invoke a runtime entry point.
void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7993b19..cc2723d 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,6 @@
kNotCompiledUnresolvedMethod,
kNotCompiledUnresolvedField,
kNotCompiledNonSequentialRegPair,
- kNotCompiledVolatile,
kNotOptimizedTryCatch,
kNotOptimizedDisabled,
kNotCompiledCantAccesType,
@@ -92,7 +91,6 @@
case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
- case kNotCompiledVolatile : return "kNotCompiledVolatile";
case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";